]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
Revert "crush: add multistep retry rules" 55331/head
authorSamuel Just <rexludorum@gmail.com>
Fri, 26 Jan 2024 20:23:03 +0000 (12:23 -0800)
committerSamuel Just <sjust@redhat.com>
Fri, 26 Jan 2024 20:32:05 +0000 (20:32 +0000)
This PR was merged by accident before it was ready.
Let's revert for now and open a new PR.

Signed-off-by: Samuel Just <sjust@redhat.com>
20 files changed:
doc/rados/operations/crush-map-edits.rst
doc/rados/operations/crush-map.rst
qa/erasure-code/ec-rados-plugin=jerasure-k=4-m=2.yaml
qa/tasks/mgr/dashboard/test_erasure_code_profile.py
src/crush/CrushCompiler.cc
src/crush/CrushWrapper.cc
src/crush/CrushWrapper.h
src/crush/crush.h
src/crush/grammar.h
src/crush/mapper.c
src/crush/mapper.h
src/erasure-code/ErasureCode.cc
src/erasure-code/ErasureCode.h
src/include/ceph_features.h
src/mon/OSDMonitor.cc
src/osd/OSDMap.cc
src/test/cli/crushtool/choose-args.t
src/test/cli/osdmaptool/crush.t
src/test/crush/crush.cc
src/vstart.sh

index 22e7e2f3772f33ecb01cfb1996b7ab2146c2a9c2..46a4a4f74e873896d03b037ed310c54faac03a97 100644 (file)
@@ -419,7 +419,7 @@ centers for three-way replication, and yet another rule for erasure coding acros
 six storage devices. For a detailed discussion of CRUSH rules, see **Section 3.2**
 of `CRUSH - Controlled, Scalable, Decentralized Placement of Replicated Data`_.
 
-A normal CRUSH rule takes the following form::
+A rule takes the following form::
 
     rule <rulename> {
 
@@ -430,18 +430,6 @@ A normal CRUSH rule takes the following form::
         step emit
     }
 
-CRUSH MSR rules are a distinct type of CRUSH rule which supports retrying steps
-and provides better support for configurations that require multiple OSDs within
-each failure domain.  MSR rules take the following form::
-
-    rule <rulename> {
-
-        id [a unique integer ID]
-        type [msr_indep|msr_firsn]
-        step take <bucket-name> [class <device-class>]
-        step choosemsr <N> type <bucket-type>
-        step emit
-    }
 
 ``id``
    :Description: A unique integer that identifies the rule.
@@ -453,14 +441,12 @@ each failure domain.  MSR rules take the following form::
 
 ``type``
    :Description: Denotes the type of replication strategy to be enforced by the
-                 rule.  msr_firstn and msr_indep are a distinct descent algorithm
-                which supports retrying steps within the rule and therefore
-                multiple OSDs per failure domain.
+                 rule.
    :Purpose: A component of the rule mask.
    :Type: String
    :Required: Yes
    :Default: ``replicated``
-   :Valid Values: ``replicated``, ``erasure``, ``msr_firstn``, ``msr_indep``
+   :Valid Values: ``replicated`` or ``erasure``
 
 
 ``step take <bucket-name> [class <device-class>]``
@@ -539,16 +525,6 @@ each failure domain.  MSR rules take the following form::
                  final CRUSH mapping transformation is therefore 1, 2, 3, 4, 5
                  → 1, 2, 6, 4, 5.
 
-``step choosemsr {num} type {bucket-type}``
-   :Description: Selects a num buckets of type bucket-type.  msr_firstn and msr_indep
-                must use choosemsr rather than choose or chooseleaf.
-
-                 - If ``{num} == 0``, choose ``pool-num-replicas`` buckets (as many buckets as are available).
-                 - If ``pool-num-replicas > {num} > 0``, choose that many buckets.
-   :Purpose: Choose step required for msr_firstn and msr_indep rules.
-   :Prerequisite: Follows ``step take`` and precedes ``step emit``
-   :Example: ``step choosemsr 3 type host``
-
 .. _crush-reclassify:
 
 Migrating from a legacy SSD rule to device classes
index e18d593253d9dd0ef8c980fbee73f401cad50991..39151e6d4a766d0c15b85cf8a1d0d25e7f6d3e52 100644 (file)
@@ -709,13 +709,6 @@ The relevant erasure-code profile properties are as follows:
    [default: ``default``].
  * **crush-failure-domain**: the CRUSH bucket type used in the distribution of
    erasure-coded shards [default: ``host``].
- * **crush-osds-per-failure-domain**: Maximum number of OSDs to place in each
-   failure domain -- defaults to 1.  Using a value greater than one will
-   cause a CRUSH MSR rule to be created, see below.  Must be specified if
-   crush-num-failure-domains is specified.
- * **crush-num-failure-domains**: Number of failure domains to map.  Must be
-   specified if crush-osds-per-failure-domain is specified.  Results in
-   a CRUSH MSR rule being created.
  * **crush-device-class**: the device class on which to place data [default:
    none, which means that all devices are used].
  * **k** and **m** (and, for the ``lrc`` plugin, **l**): these determine the
@@ -733,21 +726,6 @@ The relevant erasure-code profile properties are as follows:
    argument is omitted, then Ceph will create the CRUSH rule automatically.
 
 
-CRUSH MSR Rules
----------------
-
-Creating an erasure-code profile with a crush-osds-per-failure-domain
-value greater than one will cause a CRUSH MSR rule type to be created
-instead of a normal CRUSH rule.  Normal crush rules cannot retry prior
-steps when an out OSD is encountered and rely on CHOOSELEAF steps to
-permit moving OSDs to new hosts.  However, CHOOSELEAF rules don't
-support more than a single OSD per failure domain.  MSR rules, new in
-squid, support multiple OSDs per failure domain by retrying all prior
-steps when an out OSD is encountered.  Using MSR rules requires that
-OSDs and clients be required to support the CRUSH_MSR feature bit
-(squid or newer).
-
-
 Deleting rules
 --------------
 
index a0cd68a55f534595727cbfb78d544b5f12071d80..dfcc61607a7d0f09177eca52eb2e3306097dd883 100644 (file)
@@ -11,9 +11,7 @@ tasks:
       k: 4
       m: 2
       technique: reed_sol_van
-      crush-failure-domain: host
-      crush-osds-per-failure-domain: 2
-      crush-num-failure-domains: 3
+      crush-failure-domain: osd
     op_weights:
       read: 100
       write: 0
index a50914008934369b1ea440e7cb721dffd9336351..7fb7c1c8270fa4022542898d33b661fc10e698e0 100644 (file)
@@ -79,7 +79,7 @@ class ECPTest(DashboardTestCase):
         self.assertStatus(201)
 
         self._get('/api/erasure_code_profile/lrc')
-        self.assertJsonSubset({
+        self.assertJsonBody({
             'crush-device-class': '',
             'crush-failure-domain': 'host',
             'crush-root': 'default',
index c884caed00e6bec4d8cdb2e220cb5f7277ad1477..5e51aad8dba492cd61060547760ea7d76f87e1dc 100644 (file)
@@ -321,13 +321,6 @@ int CrushCompiler::decompile(ostream &out)
   if (crush.get_allowed_bucket_algs() != CRUSH_LEGACY_ALLOWED_BUCKET_ALGS)
     out << "tunable allowed_bucket_algs " << crush.get_allowed_bucket_algs()
        << "\n";
-  if (crush.has_nondefault_tunables_msr()) {
-    out << "tunable msr_descents " << crush.get_msr_descents()
-       << "\n";
-    out << "tunable msr_collision_tries "
-       << crush.get_msr_collision_tries()
-       << "\n";
-  }
 
   out << "\n# devices\n";
   for (int i=0; i<crush.get_max_devices(); i++) {
@@ -370,18 +363,12 @@ int CrushCompiler::decompile(ostream &out)
     out << "\tid " << i << "\n";
 
     switch (crush.get_rule_type(i)) {
-    case CRUSH_RULE_TYPE_REPLICATED:
+    case CEPH_PG_TYPE_REPLICATED:
       out << "\ttype replicated\n";
       break;
-    case CRUSH_RULE_TYPE_ERASURE:
+    case CEPH_PG_TYPE_ERASURE:
       out << "\ttype erasure\n";
       break;
-    case CRUSH_RULE_TYPE_MSR_FIRSTN:
-      out << "\ttype msr_firstn\n";
-      break;
-    case CRUSH_RULE_TYPE_MSR_INDEP:
-      out << "\ttype msr_indep\n";
-      break;
     default:
       out << "\ttype " << crush.get_rule_type(i) << "\n";
     }
@@ -435,15 +422,6 @@ int CrushCompiler::decompile(ostream &out)
        out << "\tstep set_chooseleaf_stable " << crush.get_rule_arg1(i, j)
            << "\n";
        break;
-      case CRUSH_RULE_SET_MSR_DESCENTS:
-       out << "\tstep set_msr_descents " << crush.get_rule_arg1(i, j)
-           << "\n";
-       break;
-      case CRUSH_RULE_SET_MSR_COLLISION_TRIES:
-       out << "\tstep set_msr_collision_tries "
-           << crush.get_rule_arg1(i, j)
-           << "\n";
-       break;
       case CRUSH_RULE_CHOOSE_FIRSTN:
        out << "\tstep choose firstn "
            << crush.get_rule_arg1(i, j) 
@@ -472,13 +450,6 @@ int CrushCompiler::decompile(ostream &out)
        print_type_name(out, crush.get_rule_arg2(i, j), crush);
        out << "\n";
        break;
-      case CRUSH_RULE_CHOOSE_MSR:
-       out << "\tstep choosemsr "
-           << crush.get_rule_arg1(i, j) 
-           << " type ";
-       print_type_name(out, crush.get_rule_arg2(i, j), crush);
-       out << "\n";
-       break;
       }
     }
     out << "}\n";
@@ -561,10 +532,6 @@ int CrushCompiler::parse_tunable(iter_t const& i)
     crush.set_straw_calc_version(val);
   else if (name == "allowed_bucket_algs")
     crush.set_allowed_bucket_algs(val);
-  else if (name == "msr_descents")
-    crush.set_msr_descents(val);
-  else if (name == "msr_collision_tries")
-    crush.set_msr_collision_tries(val);
   else {
     err << "tunable " << name << " not recognized" << std::endl;
     return -1;
@@ -814,13 +781,9 @@ int CrushCompiler::parse_rule(iter_t const& i)
   string tname = string_node(i->children[start+2]);
   int type;
   if (tname == "replicated")
-    type = CRUSH_RULE_TYPE_REPLICATED;
+    type = CEPH_PG_TYPE_REPLICATED;
   else if (tname == "erasure")
-    type = CRUSH_RULE_TYPE_ERASURE;
-  else if (tname == "msr_firstn")
-    type = CRUSH_RULE_TYPE_MSR_FIRSTN;
-  else if (tname == "msr_indep")
-    type = CRUSH_RULE_TYPE_MSR_INDEP;
+    type = CEPH_PG_TYPE_ERASURE;
   else 
     ceph_abort();
 
@@ -942,18 +905,6 @@ int CrushCompiler::parse_rule(iter_t const& i)
        crush.set_rule_step_set_chooseleaf_stable(ruleno, step++, val);
       }
       break;
-    case crush_grammar::_step_set_msr_descents:
-      {
-       int val = int_node(s->children[1]);
-       crush.set_rule_step_set_msr_descents(ruleno, step++, val);
-      }
-      break;
-    case crush_grammar::_step_set_msr_collision_tries:
-      {
-       int val = int_node(s->children[1]);
-       crush.set_rule_step_set_msr_collision_tries(ruleno, step++, val);
-      }
-      break;
 
     case crush_grammar::_step_choose:
     case crush_grammar::_step_chooseleaf:
@@ -981,17 +932,6 @@ int CrushCompiler::parse_rule(iter_t const& i)
       }
       break;
 
-    case crush_grammar::_step_choose_msr:
-      {
-       string type = string_node(s->children[3]);
-       if (!type_id.count(type)) {
-         err << "in rule '" << rname << "' type '" << type << "' not defined" << std::endl;
-         return -1;
-       }
-       crush.set_rule_step_choose_msr(ruleno, step++, int_node(s->children[1]), type_id[type]);
-      }
-      break;
-
     case crush_grammar::_step_emit:
       crush.set_rule_step_emit(ruleno, step++);
       break;
index 4850e36f9b5cbec979e1ffe9f0772419061b0825..0f40e6875e1be84ac164d6e8697f714e95003fb9 100644 (file)
@@ -135,29 +135,6 @@ bool CrushWrapper::is_v5_rule(unsigned ruleid) const
   return false;
 }
 
-bool CrushWrapper::has_msr_rules() const
-{
-  for (unsigned i=0; i<crush->max_rules; i++) {
-    if (is_msr_rule(i)) {
-      return true;
-    }
-  }
-  return false;
-}
-
-bool CrushWrapper::is_msr_rule(unsigned ruleid) const
-{
-  if (ruleid >= crush->max_rules)
-    return false;
-  
-  crush_rule *r = crush->rules[ruleid];
-  if (!r)
-    return false;
-
-  return r->type == CRUSH_RULE_TYPE_MSR_INDEP ||
-    r->type == CRUSH_RULE_TYPE_MSR_FIRSTN;
-}
-
 bool CrushWrapper::has_choose_args() const
 {
   return !choose_args.empty();
@@ -2261,7 +2238,6 @@ void CrushWrapper::reweight_bucket(
 int CrushWrapper::add_simple_rule_at(
   string name, string root_name,
   string failure_domain_name,
-  int num_failure_domains,
   string device_class,
   string mode, int rule_type,
   int rno,
@@ -2333,19 +2309,17 @@ int CrushWrapper::add_simple_rule_at(
   }
   crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0);
   if (type)
-    crush_rule_set_step(
-      rule, step++,
-      mode == "firstn" ? CRUSH_RULE_CHOOSELEAF_FIRSTN :
-      CRUSH_RULE_CHOOSELEAF_INDEP,
-      num_failure_domains <= 0 ? CRUSH_CHOOSE_N : num_failure_domains,
-      type);
+    crush_rule_set_step(rule, step++,
+                       mode == "firstn" ? CRUSH_RULE_CHOOSELEAF_FIRSTN :
+                       CRUSH_RULE_CHOOSELEAF_INDEP,
+                       CRUSH_CHOOSE_N,
+                       type);
   else
-    crush_rule_set_step(
-      rule, step++,
-      mode == "firstn" ? CRUSH_RULE_CHOOSE_FIRSTN :
-      CRUSH_RULE_CHOOSE_INDEP,
-      num_failure_domains <= 0 ? CRUSH_CHOOSE_N : num_failure_domains,
-      0);
+    crush_rule_set_step(rule, step++,
+                       mode == "firstn" ? CRUSH_RULE_CHOOSE_FIRSTN :
+                       CRUSH_RULE_CHOOSE_INDEP,
+                       CRUSH_CHOOSE_N,
+                       0);
   crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
 
   int ret = crush_add_rule(crush, rule, rno);
@@ -2361,125 +2335,13 @@ int CrushWrapper::add_simple_rule_at(
 int CrushWrapper::add_simple_rule(
   string name, string root_name,
   string failure_domain_name,
-  int num_failure_domains,
   string device_class,
   string mode, int rule_type,
   ostream *err)
 {
-  return add_simple_rule_at(
-    name, root_name, failure_domain_name, num_failure_domains,
-    device_class,
-    mode,
-    rule_type, -1, err);
-}
-
-int CrushWrapper::add_multi_osd_per_failure_domain_rule_at(
-  string name, string root_name, string failure_domain_name,
-  int num_failure_domains,
-  int osds_per_failure_domain,
-  string device_class,
-  crush_rule_type rule_type,
-  int rno,
-  ostream *err)
-{
-  if (rule_exists(name)) {
-    if (err)
-      *err << "rule " << name << " exists";
-    return -EEXIST;
-  }
-  if (rno >= 0) {
-    if (rule_exists(rno)) {
-      if (err)
-        *err << "rule with ruleno " << rno << " exists";
-      return -EEXIST;
-    }
-  } else {
-    for (rno = 0; rno < get_max_rules(); rno++) {
-      if (!rule_exists(rno))
-        break;
-    }
-  }
-  if (!name_exists(root_name)) {
-    if (err)
-      *err << "root item " << root_name << " does not exist";
-    return -ENOENT;
-  }
-  int root = get_item_id(root_name);
-  int type = 0;
-  if (failure_domain_name.length()) {
-    type = get_type_id(failure_domain_name);
-    if (type < 0) {
-      if (err)
-       *err << "unknown type " << failure_domain_name;
-      return -EINVAL;
-    }
-  }
-  if (device_class.size()) {
-    if (!class_exists(device_class)) {
-      if (err)
-       *err << "device class " << device_class << " does not exist";
-      return -EINVAL;
-    }
-    int c = get_class_id(device_class);
-    if (class_bucket.count(root) == 0 ||
-       class_bucket[root].count(c) == 0) {
-      if (err)
-       *err << "root " << root_name << " has no devices with class "
-            << device_class;
-      return -EINVAL;
-    }
-    root = class_bucket[root][c];
-  }
-  if (rule_type != CRUSH_RULE_TYPE_MSR_INDEP &&
-      rule_type != CRUSH_RULE_TYPE_MSR_FIRSTN) {
-    if (err)
-      *err << "unknown rule_type " << rule_type;
-    return -EINVAL;
-  }
-
-  int steps = 4;
-  crush_rule *rule = crush_make_rule(steps, rule_type);
-  ceph_assert(rule);
-  int step = 0;
-  crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0);
-  crush_rule_set_step(rule, step++,
-                     CRUSH_RULE_CHOOSE_MSR,
-                     num_failure_domains,
-                     type);
-  crush_rule_set_step(rule, step++,
-                     CRUSH_RULE_CHOOSE_MSR,
-                     osds_per_failure_domain,
-                     0);
-  crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
-
-  int ret = crush_add_rule(crush, rule, rno);
-  if(ret < 0) {
-    *err << "failed to add rule " << rno << " because " << cpp_strerror(ret);
-    return ret;
-  }
-  set_rule_name(rno, name);
-  have_rmaps = false;
-  return rno;
-}
-
-
-int CrushWrapper::add_indep_multi_osd_per_failure_domain_rule(
-  string name, string root_name,
-  string failure_domain_name,
-  int num_failure_domains,
-  int osds_per_failure_domain,
-  string device_class,
-  ostream *err)
-{
-  return add_multi_osd_per_failure_domain_rule_at(
-    name, root_name,
-    failure_domain_name,
-    num_failure_domains,
-    osds_per_failure_domain,
-    device_class,
-    CRUSH_RULE_TYPE_MSR_INDEP,
-    -1,
-    err);
+  return add_simple_rule_at(name, root_name, failure_domain_name, device_class,
+                           mode,
+                           rule_type, -1, err);
 }
 
 float CrushWrapper::_get_take_weight_osd_map(int root,
@@ -3218,10 +3080,6 @@ void CrushWrapper::encode(bufferlist& bl, uint64_t features) const
       }
     }
   }
-  if (HAVE_FEATURE(features, CRUSH_MSR)) {
-    encode(crush->msr_descents, bl);
-    encode(crush->msr_collision_tries, bl);
-  }
 }
 
 static void decode_32_or_64_string_map(map<int32_t,string>& m, bufferlist::const_iterator& blp)
@@ -3372,12 +3230,6 @@ void CrushWrapper::decode(bufferlist::const_iterator& blp)
        choose_args[choose_args_index] = arg_map;
       }
     }
-    if (!blp.end()) {
-      decode(crush->msr_descents, blp);
-      decode(crush->msr_collision_tries, blp);
-    } else {
-      set_default_msr_tunables();
-    }
     update_choose_args(nullptr); // in case we decode a legacy "corrupted" map
     finalize();
   }
@@ -3633,8 +3485,6 @@ void CrushWrapper::dump_tunables(Formatter *f) const
   f->dump_int("chooseleaf_descend_once", get_chooseleaf_descend_once());
   f->dump_int("chooseleaf_vary_r", get_chooseleaf_vary_r());
   f->dump_int("chooseleaf_stable", get_chooseleaf_stable());
-  f->dump_int("msr_descents", get_msr_descents());
-  f->dump_int("msr_collision_tries", get_msr_collision_tries());
   f->dump_int("straw_calc_version", get_straw_calc_version());
   f->dump_int("allowed_bucket_algs", get_allowed_bucket_algs());
 
@@ -3665,7 +3515,6 @@ void CrushWrapper::dump_tunables(Formatter *f) const
   f->dump_int("has_v4_buckets", (int)has_v4_buckets());
   f->dump_int("require_feature_tunables5", (int)has_nondefault_tunables5());
   f->dump_int("has_v5_rules", (int)has_v5_rules());
-  f->dump_int("has_msr_rules", (int)has_msr_rules());
 }
 
 void CrushWrapper::dump_choose_args(Formatter *f) const
@@ -3764,11 +3613,6 @@ void CrushWrapper::dump_rule(int rule_id, Formatter *f) const
       f->dump_int("num", get_rule_arg1(rule_id, j));
       f->dump_string("type", get_type_name(get_rule_arg2(rule_id, j)));
       break;
-    case CRUSH_RULE_CHOOSE_MSR:
-      f->dump_string("op", "choosemsr");
-      f->dump_int("num", get_rule_arg1(rule_id, j));
-      f->dump_string("type", get_type_name(get_rule_arg2(rule_id, j)));
-      break;
     case CRUSH_RULE_SET_CHOOSE_TRIES:
       f->dump_string("op", "set_choose_tries");
       f->dump_int("num", get_rule_arg1(rule_id, j));
@@ -3777,14 +3621,6 @@ void CrushWrapper::dump_rule(int rule_id, Formatter *f) const
       f->dump_string("op", "set_chooseleaf_tries");
       f->dump_int("num", get_rule_arg1(rule_id, j));
       break;
-    case CRUSH_RULE_SET_MSR_DESCENTS:
-      f->dump_string("op", "set_msr_descents");
-      f->dump_int("num", get_rule_arg1(rule_id, j));
-      break;
-    case CRUSH_RULE_SET_MSR_COLLISION_TRIES:
-      f->dump_string("op", "set_msr_collision_tries");
-      f->dump_int("num", get_rule_arg1(rule_id, j));
-      break;
     default:
       f->dump_int("opcode", get_rule_op(rule_id, j));
       f->dump_int("arg1", get_rule_arg1(rule_id, j));
index 317f4c28bdd6f101cad19350461d94c52d95fa55..b8caa24ce621c989ec70820f59c3245cbc033636 100644 (file)
@@ -125,7 +125,6 @@ public:
     crush->chooseleaf_vary_r = 0;
     crush->chooseleaf_stable = 0;
     crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
-    set_default_msr_tunables();
   }
   void set_tunables_bobtail() {
     crush->choose_local_tries = 0;
@@ -135,7 +134,6 @@ public:
     crush->chooseleaf_vary_r = 0;
     crush->chooseleaf_stable = 0;
     crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
-    set_default_msr_tunables();
   }
   void set_tunables_firefly() {
     crush->choose_local_tries = 0;
@@ -145,7 +143,6 @@ public:
     crush->chooseleaf_vary_r = 1;
     crush->chooseleaf_stable = 0;
     crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
-    set_default_msr_tunables();
   }
   void set_tunables_hammer() {
     crush->choose_local_tries = 0;
@@ -159,7 +156,6 @@ public:
       (1 << CRUSH_BUCKET_LIST) |
       (1 << CRUSH_BUCKET_STRAW) |
       (1 << CRUSH_BUCKET_STRAW2);
-    set_default_msr_tunables();
   }
   void set_tunables_jewel() {
     crush->choose_local_tries = 0;
@@ -173,7 +169,6 @@ public:
       (1 << CRUSH_BUCKET_LIST) |
       (1 << CRUSH_BUCKET_STRAW) |
       (1 << CRUSH_BUCKET_STRAW2);
-    set_default_msr_tunables();
   }
 
   void set_tunables_legacy() {
@@ -238,24 +233,6 @@ public:
     crush->straw_calc_version = n;
   }
 
-  int get_msr_descents() const {
-    return crush->msr_descents;
-  }
-  void set_msr_descents(int n) {
-    crush->msr_descents = n;
-  }
-
-  int get_msr_collision_tries() const {
-    return crush->msr_collision_tries;
-  }
-  void set_msr_collision_tries(int n) {
-    crush->msr_collision_tries = n;
-  }
-  void set_default_msr_tunables() {
-    set_msr_descents(100);
-    set_msr_collision_tries(100);
-  }
-
   unsigned get_allowed_bucket_algs() const {
     return crush->allowed_bucket_algs;
   }
@@ -271,8 +248,7 @@ public:
       crush->chooseleaf_descend_once == 0 &&
       crush->chooseleaf_vary_r == 0 &&
       crush->chooseleaf_stable == 0 &&
-      crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS &&
-      !has_nondefault_tunables_msr();
+      crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
   }
   bool has_bobtail_tunables() const {
     return
@@ -282,8 +258,7 @@ public:
       crush->chooseleaf_descend_once == 1 &&
       crush->chooseleaf_vary_r == 0 &&
       crush->chooseleaf_stable == 0 &&
-      crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS &&
-      !has_nondefault_tunables_msr();
+      crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
   }
   bool has_firefly_tunables() const {
     return
@@ -293,8 +268,7 @@ public:
       crush->chooseleaf_descend_once == 1 &&
       crush->chooseleaf_vary_r == 1 &&
       crush->chooseleaf_stable == 0 &&
-      crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS &&
-      !has_nondefault_tunables_msr();
+      crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
   }
   bool has_hammer_tunables() const {
     return
@@ -307,8 +281,7 @@ public:
       crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) |
                                      (1 << CRUSH_BUCKET_LIST) |
                                      (1 << CRUSH_BUCKET_STRAW) |
-                                     (1 << CRUSH_BUCKET_STRAW2)) &&
-      !has_nondefault_tunables_msr();
+                                     (1 << CRUSH_BUCKET_STRAW2));
   }
   bool has_jewel_tunables() const {
     return
@@ -321,8 +294,7 @@ public:
       crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) |
                                      (1 << CRUSH_BUCKET_LIST) |
                                      (1 << CRUSH_BUCKET_STRAW) |
-                                     (1 << CRUSH_BUCKET_STRAW2)) &&
-      !has_nondefault_tunables_msr();
+                                     (1 << CRUSH_BUCKET_STRAW2));
   }
 
   bool has_optimal_tunables() const {
@@ -350,11 +322,6 @@ public:
     return
         crush->chooseleaf_stable != 0;
   }
-  bool has_nondefault_tunables_msr() const {
-    return
-      crush->msr_descents != 100 ||
-      crush->msr_collision_tries != 100;
-  }
 
   bool has_v2_rules() const;
   bool has_v3_rules() const;
@@ -362,17 +329,13 @@ public:
   bool has_v5_rules() const;
   bool has_choose_args() const;          // any choose_args
   bool has_incompat_choose_args() const; // choose_args that can't be made compat
-  bool has_msr_rules() const;
 
   bool is_v2_rule(unsigned ruleid) const;
   bool is_v3_rule(unsigned ruleid) const;
   bool is_v5_rule(unsigned ruleid) const;
-  bool is_msr_rule(unsigned ruleid) const;
 
   std::string get_min_required_version() const {
-    if (has_msr_rules() || has_nondefault_tunables_msr())
-      return "squid";
-    else if (has_v5_rules() || has_nondefault_tunables5())
+    if (has_v5_rules() || has_nondefault_tunables5())
       return "jewel";
     else if (has_v4_buckets())
       return "hammer";
@@ -602,21 +565,6 @@ public:
     if (have_rmaps)
       rule_name_rmap[name] = i;
   }
-  bool rule_valid_for_pool_type(int rule_id, int ptype) const {
-    auto rule_type = get_rule_type(rule_id);
-    switch (ptype) {
-    case CEPH_PG_TYPE_REPLICATED:
-      return rule_type == CRUSH_RULE_TYPE_REPLICATED ||
-       rule_type == CRUSH_RULE_TYPE_MSR_FIRSTN;
-    case CEPH_PG_TYPE_ERASURE:
-      return rule_type == CRUSH_RULE_TYPE_ERASURE ||
-       rule_type == CRUSH_RULE_TYPE_MSR_INDEP;
-    default:
-      ceph_assert(0 == "impossible");
-      return false;
-    }
-  }
-
   bool is_shadow_item(int id) const {
     const char *name = get_item_name(id);
     return name && !is_valid_crush_name(name);
@@ -1203,14 +1151,6 @@ public:
   int set_rule_step_set_chooseleaf_stable(unsigned ruleno, unsigned step, int val) {
     return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_STABLE, val, 0);
   }
-
-  int set_rule_step_set_msr_descents(unsigned ruleno, unsigned step, int val) {
-    return set_rule_step(ruleno, step, CRUSH_RULE_SET_MSR_DESCENTS, val, 0);
-  }
-  int set_rule_step_set_msr_collision_tries(unsigned ruleno, unsigned step, int val) {
-    return set_rule_step(ruleno, step, CRUSH_RULE_SET_MSR_COLLISION_TRIES, val, 0);
-  }
-
   int set_rule_step_choose_firstn(unsigned ruleno, unsigned step, int val, int type) {
     return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_FIRSTN, val, type);
   }
@@ -1223,61 +1163,22 @@ public:
   int set_rule_step_choose_leaf_indep(unsigned ruleno, unsigned step, int val, int type) {
     return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_INDEP, val, type);
   }
-  int set_rule_step_choose_msr(unsigned ruleno, unsigned step, int val, int type) {
-    return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_MSR, val, type);
-  }
   int set_rule_step_emit(unsigned ruleno, unsigned step) {
     return set_rule_step(ruleno, step, CRUSH_RULE_EMIT, 0, 0);
   }
 
   int add_simple_rule(
     std::string name, std::string root_name, std::string failure_domain_type,
-    int num_failure_domains,
     std::string device_class, std::string mode, int rule_type,
     std::ostream *err = 0);
-  int add_simple_rule(
-    std::string name, std::string root_name, std::string failure_domain_type,
-    std::string device_class, std::string mode, int rule_type,
-    std::ostream *err = 0) {
-    return add_simple_rule(
-      name, root_name, failure_domain_type, -1,
-      device_class, mode, rule_type, err);
-  }
-
-  int add_indep_multi_osd_per_failure_domain_rule(
-    std::string name, std::string root_name, std::string failure_domain_type,
-    int osds_per_failure_domain,
-    int num_failure_domains,
-    std::string device_class,
-    std::ostream *err = 0);
 
   /**
    * @param rno rule[set] id to use, -1 to pick the lowest available
    */
   int add_simple_rule_at(
     std::string name, std::string root_name,
-    std::string failure_domain_type,
-    int num_failure_domains,
-    std::string device_class, std::string mode,
+    std::string failure_domain_type, std::string device_class, std::string mode,
     int rule_type, int rno, std::ostream *err = 0);
-  int add_simple_rule_at(
-    std::string name, std::string root_name,
-    std::string failure_domain_type,
-    std::string device_class, std::string mode,
-    int rule_type, int rno, std::ostream *err = 0) {
-    return add_simple_rule_at(
-      name, root_name, failure_domain_type, -1,
-      device_class, mode, rule_type, rno, err);
-  }
-
-  int add_multi_osd_per_failure_domain_rule_at(
-    std::string name, std::string root_name, std::string failure_domain_type,
-    int osds_per_failure_domain,
-    int num_failure_domains,
-    std::string device_class,
-    crush_rule_type rule_type,
-    int rno,
-    std::ostream *err = 0);
 
   int remove_rule(int ruleno);
 
index bdcdc97eef2b98eedd19a3e4295178cd1fa5c2b3..fde2df6a8a3ec7cef8c59b253c9122eeea0a0b88 100644 (file)
@@ -65,15 +65,7 @@ enum crush_opcodes {
        CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
        CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
        CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12,
-       CRUSH_RULE_SET_CHOOSELEAF_STABLE = 13,
-
-       /* set choose_msr_total_tries */
-       CRUSH_RULE_SET_MSR_DESCENTS = 14,
-       /* set choose_msr_local_collision_tries */
-       CRUSH_RULE_SET_MSR_COLLISION_TRIES = 15,
-
-       /* choose variant without FIRSTN|INDEP */
-       CRUSH_RULE_CHOOSE_MSR = 16
+       CRUSH_RULE_SET_CHOOSELEAF_STABLE = 13
 };
 
 /*
@@ -95,12 +87,7 @@ struct crush_rule {
 #define crush_rule_size(len) (sizeof(struct crush_rule) + \
                              (len)*sizeof(struct crush_rule_step))
 
-enum crush_rule_type {
-       CRUSH_RULE_TYPE_REPLICATED = 1,
-       CRUSH_RULE_TYPE_ERASURE = 3,
-       CRUSH_RULE_TYPE_MSR_FIRSTN = 4,
-       CRUSH_RULE_TYPE_MSR_INDEP = 5
-};
+
 
 /*
  * A bucket is a named container of other items (either devices or
@@ -423,12 +410,6 @@ struct crush_map {
          */
        __u8 chooseleaf_stable;
 
-       /*! Sets total descents for MSR rules */
-       __u8 msr_descents;
-
-       /*! Sets local collision retries for MSR rules */
-       __u8 msr_collision_tries;
-
         /*! @cond INTERNAL */
        /* This value is calculated after decode or construction by
           the builder. It is exposed here (rather than having a
index 0c9a2da7d77069ea2577e0388c76cffa4def0001..b5c356a49e17e8bafc9c18188a6fecf08e643b72 100644 (file)
@@ -50,11 +50,8 @@ struct crush_grammar : public boost::spirit::grammar<crush_grammar>
     _step_set_choose_tries,
     _step_set_choose_local_tries,
     _step_set_choose_local_fallback_tries,
-    _step_set_msr_descents,
-    _step_set_msr_collision_tries,
     _step_choose,
     _step_chooseleaf,
-    _step_choose_msr,
     _step_emit,
     _step,
     _crushrule,
@@ -94,11 +91,8 @@ struct crush_grammar : public boost::spirit::grammar<crush_grammar>
     boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_step_set_chooseleaf_tries> >    step_set_chooseleaf_tries;
     boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_step_set_chooseleaf_vary_r> >    step_set_chooseleaf_vary_r;
     boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_step_set_chooseleaf_stable> >    step_set_chooseleaf_stable;
-    boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_step_set_msr_descents> >    step_set_msr_descents;
-    boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_step_set_msr_collision_tries> >    step_set_msr_collision_tries;
     boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_step_choose> >    step_choose;
     boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_step_chooseleaf> >      step_chooseleaf;
-    boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_step_choose_msr> >      step_choose_msr;
     boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_step_emit> >      step_emit;
     boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_step> >      step;
     boost::spirit::rule<ScannerT, boost::spirit::parser_context<>, boost::spirit::parser_tag<_crushrule> >      crushrule;
@@ -155,8 +149,6 @@ struct crush_grammar : public boost::spirit::grammar<crush_grammar>
       step_set_chooseleaf_tries = str_p("set_chooseleaf_tries") >> posint;
       step_set_chooseleaf_vary_r = str_p("set_chooseleaf_vary_r") >> posint;
       step_set_chooseleaf_stable = str_p("set_chooseleaf_stable") >> posint;
-      step_set_msr_descents = str_p("set_msr_descents") >> posint;
-      step_set_msr_collision_tries = str_p("set_msr_collision_tries") >> posint;
       step_choose = str_p("choose")
        >> ( str_p("indep") | str_p("firstn") )
        >> integer
@@ -165,9 +157,6 @@ struct crush_grammar : public boost::spirit::grammar<crush_grammar>
        >> ( str_p("indep") | str_p("firstn") )
        >> integer
        >> str_p("type") >> name;
-      step_choose_msr = str_p("choosemsr")
-       >> integer
-       >> str_p("type") >> name;
       step_emit = str_p("emit");
       step = str_p("step") >> ( step_take |
                                step_set_choose_tries |
@@ -176,15 +165,12 @@ struct crush_grammar : public boost::spirit::grammar<crush_grammar>
                                step_set_chooseleaf_tries |
                                step_set_chooseleaf_vary_r |
                                step_set_chooseleaf_stable |
-                               step_set_msr_descents |
-                               step_set_msr_collision_tries |
                                step_choose |
                                step_chooseleaf |
-                               step_choose_msr |
                                step_emit );
       crushrule = str_p("rule") >> !name >> '{'
                                >> (str_p("id") | str_p("ruleset")) >> posint
-                               >> str_p("type") >> ( str_p("replicated") | str_p("erasure") | str_p("msr_firstn") | str_p("msr_indep") )
+                               >> str_p("type") >> ( str_p("replicated") | str_p("erasure") )
                                >> !(str_p("min_size") >> posint)
                                >> !(str_p("max_size") >> posint)
                           >> +step
index afeaffc5a8d83ba28df7671fe9e490998c4a64fb..736cc6162c9747cbab80dcf5b9413a72d7c31f86 100644 (file)
@@ -27,9 +27,6 @@
 
 #define dprintk(args...) /* printf(args) */
 
-#define MIN(x, y) ((x) > (y) ? (y) : (x))
-#define MAX(y, x) ((x) < (y) ? (y) : (x))
-
 /*
  * Implement the core CRUSH mapping algorithm.
  */
@@ -823,11 +820,65 @@ static void crush_choose_indep(const struct crush_map *map,
 #endif
 }
 
-static int crush_do_rule_no_retry(
-       const struct crush_map *map,
-       int ruleno, int x, int *result, int result_max,
-       const __u32 *weight, int weight_max,
-       void *cwin, const struct crush_choose_arg *choose_args)
+
+/* This takes a chunk of memory and sets it up to be a shiny new
+   working area for a CRUSH placement computation. It must be called
+   on any newly allocated memory before passing it in to
+   crush_do_rule. It may be used repeatedly after that, so long as the
+   map has not changed. If the map /has/ changed, you must make sure
+   the working size is no smaller than what was allocated and re-run
+   crush_init_workspace.
+
+   If you do retain the working space between calls to crush, make it
+   thread-local. If you reinstitute the locking I've spent so much
+   time getting rid of, I will be very unhappy with you. */
+
+void crush_init_workspace(const struct crush_map *m, void *v) {
+       /* We work by moving through the available space and setting
+          values and pointers as we go.
+
+          It's a bit like Forth's use of the 'allot' word since we
+          set the pointer first and then reserve the space for it to
+          point to by incrementing the point. */
+       struct crush_work *w = (struct crush_work *)v;
+       char *point = (char *)v;
+       __s32 b;
+       point += sizeof(struct crush_work);
+       w->work = (struct crush_work_bucket **)point;
+       point += m->max_buckets * sizeof(struct crush_work_bucket *);
+       for (b = 0; b < m->max_buckets; ++b) {
+               if (m->buckets[b] == 0)
+                       continue;
+
+               w->work[b] = (struct crush_work_bucket *) point;
+               switch (m->buckets[b]->alg) {
+               default:
+                       point += sizeof(struct crush_work_bucket);
+                       break;
+               }
+               w->work[b]->perm_x = 0;
+               w->work[b]->perm_n = 0;
+               w->work[b]->perm = (__u32 *)point;
+               point += m->buckets[b]->size * sizeof(__u32);
+       }
+       BUG_ON((char *)point - (char *)w != m->working_size);
+}
+
+/**
+ * crush_do_rule - calculate a mapping with the given input and rule
+ * @map: the crush_map
+ * @ruleno: the rule id
+ * @x: hash input
+ * @result: pointer to result vector
+ * @result_max: maximum result size
+ * @weight: weight vector (for map leaves)
+ * @weight_max: size of weight vector
+ * @cwin: Pointer to at least map->working_size bytes of memory or NULL.
+ */
+int crush_do_rule(const struct crush_map *map,
+                 int ruleno, int x, int *result, int result_max,
+                 const __u32 *weight, int weight_max,
+                 void *cwin, const struct crush_choose_arg *choose_args)
 {
        int result_len;
        struct crush_work *cw = cwin;
@@ -1030,1006 +1081,3 @@ static int crush_do_rule_no_retry(
 
        return result_len;
 }
-
-/// invariant through crush_msr_do_rule invocation
-struct crush_msr_input {
-       const struct crush_map *map;
-       const struct crush_rule *rule;
-       
-       const unsigned result_max;
-       
-       const unsigned weight_len;
-       const __u32 *weights;
-       
-       const int map_input;
-       const struct crush_choose_arg *choose_args;
-       
-       const unsigned msr_descents;
-       const unsigned msr_collision_tries;
-};
-
-/// encapsulates work space, invariant within an EMIT block
-struct crush_msr_workspace {
-       const unsigned start_stepno;
-       const unsigned end_stepno;
-
-       const unsigned result_len;
-
-       const struct crush_work *crush_work;
-
-       // int[end_stepno - start_stepno][result_len]
-       int **step_vecs;
-};
-
-/// encapsulates output space, invariant through crush_msr_do_rule invocation
-struct crush_msr_output {
-       const unsigned result_len;
-       unsigned returned_so_far;
-       int *out;
-};
-
-/**
- * crush_msr_scan_config_steps
- *
- * Scans possibly empty sequence of CRUSH_RULE_SET_CHOOSE_MSR_*_TRIES
- * steps at the start of the rule.  Returns index of next step.
- * Populates *msr_descents and *msr_collision_tries (if non-null) with
- * last matching rule.
- * @steps: steps to scan
- * @step_len: length of steps
- * @msr_descents: out param for CRUSH_RULE_SET_MSR_DESCENTS
- * @msr_collision_tries: out param for CRUSH_RULE_SET_MSR_COLLISION_TRIES
- */
-static unsigned crush_msr_scan_config_steps(
-       const struct crush_rule_step *steps,
-       unsigned step_len,
-       unsigned *msr_descents,
-       unsigned *msr_collision_tries) {
-       unsigned stepno = 0;
-       for (; stepno < step_len; ++stepno) {
-               const struct crush_rule_step *step = &steps[stepno];
-               switch (step->op) {
-               case CRUSH_RULE_SET_MSR_DESCENTS:
-                       if (msr_descents) *msr_descents = step->arg1;
-                       break;
-               case CRUSH_RULE_SET_MSR_COLLISION_TRIES:
-                       if (msr_collision_tries) *msr_collision_tries = step->arg1;
-                       break;
-               default:
-                       return stepno;
-               }
-       }
-       return stepno;
-}
-
-/// clear workspace represented by *ws
-static void crush_msr_clear_workspace(
-       struct crush_msr_workspace *ws)
-{
-       for (unsigned stepno = ws->start_stepno; stepno < ws->end_stepno;
-            ++stepno) {
-               for (unsigned i = 0; i < ws->result_len; ++i) {
-                       ws->step_vecs[stepno - ws->start_stepno][i] =
-                               CRUSH_ITEM_UNDEF;
-               }
-       }
-}
-
-/**
- * crush_msr_scan_next
- *
- * Validates an EMIT block of the form (TAKE CHOOSE_MSR* EMIT)
- * If sequence is valid, populates total_children with the width
- * of the mapping from the choose steps and next_emit with the
- * index of the next EMIT step.
- *
- * @rule: rule to scan
- * @result_max: max number of results to return
- * @max_steps: length of longest string of choosemsr steps
- * @return 0 if valid, -1 if there were validation errors
- */
-static int crush_msr_scan_next(
-       const struct crush_rule *rule,
-       unsigned result_max,
-       unsigned stepno,
-       unsigned *total_children,
-       unsigned *next_emit)
-{
-       if (stepno + 1 >= rule->len) {
-               dprintk("stepno too large\n");
-               return -1;
-       }
-       if (rule->steps[stepno].op != CRUSH_RULE_TAKE) {
-               dprintk("first rule not CRUSH_RULE_TAKE\n");
-               return -1;
-       }
-       ++stepno;
-
-       if (total_children) *total_children = 1;
-       for (; stepno < rule->len; ++stepno) {
-               const struct crush_rule_step *curstep =
-                       &(rule->steps[stepno]);
-               if (curstep->op == CRUSH_RULE_EMIT) {
-                       break;
-               }
-               if (rule->steps[stepno].op != CRUSH_RULE_CHOOSE_MSR) {
-                       dprintk("found non-choose non-emit step %d\n", stepno);
-                       return -1;
-               }
-               if (total_children) {
-                       *total_children *= curstep->arg1 ? curstep->arg1
-                               : result_max;
-               }
-       }
-       if (stepno >= rule->len) {
-               dprintk("did not find emit\n");
-               return -1;
-       }
-       if (next_emit) {
-               *next_emit = stepno;
-       }
-       return 0;
-}
-
-/**
- * crush_msr_scan_rule
- *
- * MSR rules must have the form:
- * 1) Possibly empty sequence of CRUSH_RULE_SET_CHOOSE_MSR_.*_TRIES steps
- * 2) A sequence of EMIT blocks of the form
- *   (TAKE CHOOSE_MSR* EMIT)*
- *
- * crush_msr_scan_rule validates that the form obeys the above form and
- * popualtes max_steps with the length of the longest sequence of CHOOSE_MSR
- * steps.
- *
- * crush_msr_scan_rule replicates the scan behavior of crush_msr_do_rule.
- *
- * @rule: rule to scan
- * @result_max: max number of results to return
- * @max_steps: length of longest string of choosemsr steps
- * @return 0 if valid, -1 otherwise
- */
-static int crush_msr_scan_rule(
-       const struct crush_rule *rule,
-       unsigned result_max,
-       unsigned *max_steps)
-{
-       if (max_steps) *max_steps = 0;
-       unsigned next_stepno = crush_msr_scan_config_steps(
-               rule->steps,
-               rule->len,
-               NULL, NULL);
-       while (next_stepno < rule->len) {
-               unsigned next_emit_stepno;
-               int r = crush_msr_scan_next(
-                       rule, result_max, next_stepno,
-                       NULL, &next_emit_stepno);
-               if (r < 0) return r;
-
-               if (max_steps) {
-                       *max_steps = MAX(
-                               *max_steps,
-                               next_emit_stepno - (next_stepno + 1));
-               }
-               next_stepno = next_emit_stepno + 1;
-       }
-       return 0;
-}
-
-/// Returns true if all leaf slots in [start, end) are mapped
-static int crush_msr_leaf_vec_populated(
-       const struct crush_msr_workspace *workspace,
-       const unsigned start, const unsigned end)
-{
-       BUG_ON(start >= end);
-       BUG_ON(end > workspace->result_len);
-       BUG_ON(workspace->end_stepno <= workspace->start_stepno);
-       // we check the last step vector here because output
-       // won't be ordered by index for FIRSTN rules
-       int *leaf_vec = workspace->step_vecs[
-         workspace->end_stepno - workspace->start_stepno - 1];
-       for (unsigned i = start; i < end; ++i) {
-               if (leaf_vec[i] == CRUSH_ITEM_UNDEF) {
-                       return 0;
-               }
-       }
-       return 1;
-}
-
-/// Returns try value to pass to crush based on index, tries, and local_tries
-static unsigned crush_msr_get_retry_value(
-       const unsigned result_max,
-       const unsigned index,
-       const unsigned msr_descents,
-       const unsigned msr_collision_tries)
-{
-       const unsigned total_index = (msr_descents * result_max) + index;
-       return (total_index << 16) + msr_collision_tries;
-}
-
-/**
- * crush_msr_descend
- *
- * Descend recursively from bucket until we either hit a leaf or an
- * interior node of type type.
- * @input: crush input information
- * @workspace: struct with working space
- * @bucket: bucket from which to descend
- * @type: target node type
- * @tryno: top level try number, incremented with each call into crush_msr_choose
- *         from crush_msr_do_rule
- * @local_tryno: local collision try number, incremented with each call into
- *               crush_msr_descend from crush_msr_choose after collision
- * @index: mapping index
- */
-static int crush_msr_descend(
-       const struct crush_msr_input *input,
-       const struct crush_msr_workspace *workspace,
-       const struct crush_bucket *bucket,
-       const int type,
-       const unsigned tryno,
-       const unsigned local_tryno,
-       const unsigned index)
-{
-       dprintk(" crush_msr_descend type %d tryno %d local_tryno %d index %d\n",
-               type, tryno, local_tryno, index);
-       while (1) {
-               const int child_bucket_candidate = crush_bucket_choose(
-                       bucket,
-                       workspace->crush_work->work[-1 - bucket->id],
-                       input->map_input,
-                       crush_msr_get_retry_value(
-                               input->result_max,
-                               index, tryno, local_tryno),
-                       (input->choose_args ?
-                        &(input->choose_args[-1 - bucket->id]) : 0),
-                       index);
-
-               if (child_bucket_candidate >= 0) {
-                       return child_bucket_candidate;
-               }
-
-               bucket = input->map->buckets[-1 - child_bucket_candidate];
-               if (bucket->type == type) {
-                       return child_bucket_candidate;
-               }
-       }
-}
-
-/**
- * crush_msr_valid_candidate  
- *
- * Checks whether candidate is a valid choice given buckets already
- * mapped for step stepno.
- * 
- * If candidate has already been mapped for a position in
- * [include_start, include_end), candidate is valid.
- *
- * Else, if candidate has already been mapped for a position in
- * [exclude_start, exclude_end), candidate is invalid.
- *
- * Otherwise, candidate is valid.
- *
- * @stepno: step to check
- * @exclude_start: start of exclusion range
- * @exclude_end: end of exlusion range
- * @include_start: start of inclusion range
- * @include_end: end of inclusion range
- * @candidate: bucket to check
- *
- * Note, [exclude_start, exclude_end) must contain [include_start, include_end).
- */
-static int crush_msr_valid_candidate(
-       const struct crush_msr_workspace *workspace,
-       unsigned stepno,
-       unsigned exclude_start,
-       unsigned exclude_end,
-       unsigned include_start,
-       unsigned include_end,
-       int candidate)
-{
-       BUG_ON(stepno >= workspace->end_stepno);
-       BUG_ON(stepno < workspace->start_stepno);
-
-       BUG_ON(exclude_end <= exclude_start);
-       BUG_ON(include_end <= include_start);
-
-       BUG_ON(exclude_start > include_start);
-       BUG_ON(exclude_end < include_end);
-
-       BUG_ON(exclude_end > workspace->result_len);
-
-       int *vec = workspace->step_vecs[stepno - workspace->start_stepno];
-       for (unsigned i = exclude_start; i < exclude_end; ++i) {
-               if (vec[i] == candidate) {
-                       if (i >= include_start && i < include_end) {
-                               dprintk(" crush_msr_valid_candidate: "
-                                       "candidate %d already chosen for "
-                                       "stride\n",
-                                       candidate);
-                               return 1;
-                       } else {
-                               dprintk(" crush_msr_valid_candidate: "
-                                       "candidate %d collision\n",
-                                       candidate);
-                               return 0;
-                       }
-               }
-       }
-       dprintk(" crush_msr_valid_candidate: candidate %d no collision\n",
-               candidate);
-       return 1;
-}
-
-/**
- * crush_msr_push_used
- *
- * See crush_msr_choose for details, used to push bucket indicies onto collision
- * set for specified stride.  User is responsible for ensuring that
- * [stride_start, stride_end) never holds more than stride_end - stride_start
- * entries.
- * @workspace: holds working space information
- * @stepno: index of step
- * @stride_start: start of stride
- * @stride_end: one past end of stride
- * @candidate: element to add to set
- * @return 1 if added (not already present), 0 if not added due to already
- *           being present
- */
-static int crush_msr_push_used(
-       const struct crush_msr_workspace *workspace,
-       unsigned stepno,
-       unsigned stride_start,
-       unsigned stride_end,
-       int candidate)
-{
-       BUG_ON(stepno >= workspace->end_stepno);
-       BUG_ON(stepno < workspace->start_stepno);
-
-       BUG_ON(stride_end <= stride_start);
-       BUG_ON(stride_end > workspace->result_len);
-       int *vec = workspace->step_vecs[stepno - workspace->start_stepno];
-       for (unsigned i = stride_start; i < stride_end; ++i) {
-               if (vec[i] == candidate) {
-                       return 0;
-               } else if (vec[i] == CRUSH_ITEM_UNDEF) {
-                       vec[i] = candidate;
-                       return 1;
-               }
-       }
-       BUG_ON("impossible");
-       return 0;
-}
-
-/**
- * crush_msr_push_used
- *
- * See crush_msr_choose for details, used to pop bucket indicies from collision
- * set for specified stride.  If an element is to be popped, crush_msr_pop_used
- * must be called prior to pushing another element.
- * @workspace: holds working space information
- * @stepno: index of step
- * @stride_start: start of stride
- * @stride_end: one past end of stride
- * @candidate: element to pop from set
- */
-static void crush_msr_pop_used(
-       const struct crush_msr_workspace *workspace,
-       unsigned stepno,
-       unsigned stride_start,
-       unsigned stride_end,
-       int candidate)
-{
-       BUG_ON(stepno >= workspace->end_stepno);
-       BUG_ON(stepno < workspace->start_stepno);
-
-       BUG_ON(stride_end <= stride_start);
-       BUG_ON(stride_end > workspace->result_len);
-       int *vec = workspace->step_vecs[stepno - workspace->start_stepno];
-       for (unsigned i = stride_end; i > stride_start;) {
-               --i;
-               if (vec[i] != CRUSH_ITEM_UNDEF) {
-                       BUG_ON(vec[i] != candidate);
-                       vec[i] = CRUSH_ITEM_UNDEF;
-                       return;
-               }
-       }
-       BUG_ON(0 == "impossible");
-}
-
-/**
- * crush_msr_emit_result
- *
- * Outputs mapping result from specified position.  Position in output
- * buffer depends on rule type -- FIRSTN outputs in output order, INDEP
- * outputs into specified position.
- * @output: output buffer
- * @rule_type: CRUSH_RULE_TYPE_MSR_FIRSTN or CRUSH_RULE_TYPE_MSR_INDEP
- * @position: mapping position
- * @result: mapping value to output
- */
-static void crush_msr_emit_result(
-       struct crush_msr_output *output,
-       int rule_type,
-       unsigned position,
-       int result)
-{
-       BUG_ON(position >= output->result_len);
-       BUG_ON(output->returned_so_far >= output->result_len);
-       if (rule_type == CRUSH_RULE_TYPE_MSR_FIRSTN) {
-               BUG_ON(output->out[output->returned_so_far] != CRUSH_ITEM_NONE);
-               output->out[(output->returned_so_far)++] = result;
-       } else {
-               BUG_ON(output->out[position] != CRUSH_ITEM_NONE);
-               output->out[position] = result;
-               ++output->returned_so_far;
-       }
-       dprintk(" emit: %d, returned_so_far: %d\n",
-               result, output->returned_so_far);
-}
-
-/**
- * crush_msr_choose
- *
- * Performs mapping for a single EMIT block containing CHOOSE steps
- * [current_stepno, end_stepno) into mapping indices [start_index, end_index).
- *
- * Like chooseleaf, crush_msr_choose is essentially depth-first -- it chooses
- * an item and all of the descendents under that item before moving to the
- * next item.  Each choose step in the block gets its own workspace for
- * collision detection.
- *
- * crush_msr_choose (and its recursive calls) will locally retry any bucket
- * selections that produce a collision (up to msr_collision_tries times), but
- * won't retry if it hits an out osd -- that's handled by calling back into
- * crush_msr_choose up to msr_descents times.
- *
- * @input: crush input information
- * @workspace: working space for this EMIT block
- * @output: crush mapping output buffer specification
- * @total_children: total number of children implied by the step sequence, may
- *                  be larger than end_index - start_index.
- * @start_index: start mapping index
- * @end_index: end mapping index
- * @current_stepno: first choose step
- * @end_stepno: one past last choose step, must be an EMIT
- * @tryno: try number, see crush_msr_do_rule
- */
-static unsigned crush_msr_choose(
-       const struct crush_msr_input *input,
-       const struct crush_msr_workspace *workspace,
-       struct crush_msr_output *output,
-       const struct crush_bucket *bucket,
-       const unsigned total_descendants,
-       const unsigned start_index, const unsigned end_index,
-       const unsigned current_stepno, const unsigned end_stepno,
-       const unsigned tryno)
-{
-       dprintk("crush_msr_choose: bucket %d, start_index %d, end_index %d\n",
-               bucket->id, start_index, end_index);
-
-       BUG_ON(current_stepno >= input->rule->len);
-       const struct crush_rule_step *curstep =
-               &(input->rule->steps[current_stepno]);
-       BUG_ON(curstep->op != CRUSH_RULE_CHOOSE_MSR);
-
-       /* This call into crush_msr_choose is responsible, ultimately, for
-        * populating indices [start_index, end_index).  We do this by
-        * dividing that range into a set of strides specified in the
-        * step -- choosemsr 4 host would dictate that the range be divided
-        * into 4 strides.
-        *
-        * If the full rule is
-        *
-        * ...
-        * step take root
-        * step choosemsr 4 host
-        * step choosemsr 2 osd
-        * step emit
-        *
-        * total_descendants for the initial call would be 8 (4*2) with
-        * num_stride=4 (4 hosts) and stride_length = 2 (2 osds per host).
-        * For the recursive calls, total_descendants would be 2 (8 / 4),
-        * stride_length would be 1 and num_strides would be 2.
-        */
-
-       // choosemsr 0 host should select result_max hosts
-       const unsigned num_strides = curstep->arg1 ? curstep->arg1
-               : input->result_max;
-
-       // total_descendants is the product of the steps in the block
-       BUG_ON(total_descendants % num_strides != 0);
-       const unsigned stride_length = total_descendants / num_strides;
-
-       /* MSR steps like
-        *
-        * step choosemsr 4 host
-        *
-        * guarantee that the output mapping will be divided into at least
-        * 4 hosts, not exactly 4 hosts.  We achieve this by ensuring that
-        * the sets of hosts for each stride are disjoint -- a host selected
-        * for stride 0 will not be used for any other stride.
-        *
-        * However, a single stride might end up using more than one host.
-        * If an OSD on a host is marked out, crush_msr_choose will simply
-        * skip that index when it hits it.  crush_msr_do_rule will then
-        * call back into crush_msr_choose and eventually find another OSD
-        * either on the same host or on another one not already used in
-        * another stride. For this reason, a single stride may need to
-        * remember up to stride_length entries for collision detection
-        * purposes.
-        *
-        * Unfortunately, we only have stride_length entries to work with
-        * in workspace.  Thus, prior to returning from crush_msr_choose,
-        * we remove entries that didn't actually result in a mapping.  We
-        * use the following undo vector to achieve this -- any strides that
-        * didn't result in a successful mapping are set in undo to be undone
-        * immediately prior to returning.
-        *
-        * Why prior to returning and not immediately?  Selecting a bucket in
-        * a stride impacts subsequent choices as they may have collided.  In
-        * order to limit the impact of marking an OSD out, we treat it as
-        * collidable until the next pass.
-        */
-       int undo[num_strides];
-       for (unsigned stride = 0; stride < num_strides; ++stride) {
-               undo[stride] = CRUSH_ITEM_UNDEF;
-       }
-
-       dprintk("crush_msr_choose: bucket %d, start_index %d, "
-               "end_index %d, stride_length %d\n",
-               bucket->id, start_index, end_index, stride_length);
-
-       unsigned mapped = 0;
-       unsigned stride_index = 0;
-       for (unsigned stride_start = start_index;
-            stride_start < end_index;
-            stride_start += stride_length, ++stride_index) {
-               const unsigned stride_end =
-                 MIN(stride_start + stride_length, end_index);
-    
-               // all descendants for this stride have been mapped already
-               if (crush_msr_leaf_vec_populated(
-                     workspace, stride_start, stride_end)) {
-                 continue;
-               }
-
-               int found = 0;
-               int child_bucket_candidate;
-               for (unsigned local_tryno = 0;
-                    local_tryno <= input->msr_collision_tries;
-                    ++local_tryno) {
-                       child_bucket_candidate = crush_msr_descend(
-                               input, workspace, bucket,
-                               curstep->arg2, tryno, local_tryno,
-                               stride_index);
-
-                       /* candidate is valid if:
-                        * - we already chose it for this stride
-                        * - it hasn't been chosen for any stride */
-                       if (crush_msr_valid_candidate(
-                                   workspace,
-                                   current_stepno,
-                                   // Collision on elements in [start_index, end_index)
-                                   start_index, end_index,
-                                   // ...unless in [stride_start, stride_end)
-                                   stride_start, stride_end,
-                                   child_bucket_candidate)) {
-                               found = 1;
-                               break;
-                       }
-               }
-
-               /* failed to find non-colliding choice after msr_collision_tries
-                * attempts */
-               if (!found) continue;
-
-               if (curstep->arg2 == 0 /* leaf */) {
-                       if (stride_length != 1 ||
-                           (current_stepno + 1 != end_stepno)) {
-                               /* Either condition above implies that there's
-                                * another step after a choosemsr step for the
-                                * leaf type, rule is malformed, bail */
-                               continue;
-                       }
-                       if (is_out(input->map, input->weights,
-                                  input->weight_len,
-                                  child_bucket_candidate, input->map_input)) {
-                               dprintk(" crush_msr_choose: item %d out\n",
-                                       child_bucket_candidate);
-                               /* crush_msr_do_rule will try again,
-                                * msr_descents permitting */
-                               continue;
-                       }
-                       // for collision detection
-                       int pushed = crush_msr_push_used(
-                               workspace, current_stepno, stride_start, stride_end,
-                               child_bucket_candidate);
-                       /* stride_length == 1, can't already be there */
-                       BUG_ON(!pushed);
-                       // final output, ordering depending on input->rule->type
-                       crush_msr_emit_result(
-                               output, input->rule->type,
-                               stride_start, child_bucket_candidate);
-                       mapped++;
-               } else /* not leaf */ {
-                       if (current_stepno + 1 >= end_stepno) {
-                               /* Type isn't leaf, rule is malformed since there
-                                * isn't another step */
-                               continue;
-                       }
-                       struct crush_bucket *child_bucket = input->map->buckets[
-                               -1 - child_bucket_candidate];
-                       unsigned child_mapped = crush_msr_choose(
-                               input, workspace, output,
-                               child_bucket,
-                               // total_descendants for recursive call
-                               stride_length,
-                               // recursive call populates
-                               // [stride_start, stride_end)
-                               stride_start, stride_end,
-                               // next step
-                               current_stepno + 1, end_stepno,
-                               tryno);
-                       int pushed = crush_msr_push_used(
-                               workspace,
-                               current_stepno,
-                               stride_start,
-                               stride_end,
-                               child_bucket_candidate);
-                       /* pushed may be false if we already chose this bucket
-                        * for this stride.  If so, child_mapped must have been
-                        * != 0 at the time, so we still retain it */
-                       if (pushed && (child_mapped == 0)) {
-                               // no child mapped, and we didn't choose it
-                               // before
-                               undo[stride_index] = child_bucket_candidate;
-                       } else {
-                               mapped += child_mapped;
-                       }
-               }
-       }
-
-       // pop unused buckets
-       stride_index = 0;
-       for (unsigned stride_start = start_index;
-            stride_start < end_index;
-            stride_start += stride_length, ++stride_index) {
-               if (undo[stride_index] != CRUSH_ITEM_UNDEF) {
-                       unsigned stride_end =
-                         MIN(stride_start + stride_length, end_index);
-                       crush_msr_pop_used(
-                               workspace,
-                               current_stepno,
-                               stride_start,
-                               stride_end,
-                               undo[stride_index]);
-               }
-       }
-  
-       return mapped;
-}
-
-/**
- * crush_msr_do_rule - calculate a mapping with the given input and msr rule
- *
- * msr_firstn and msr_indep rules are intended to address a limitation of
- * conventional crush rules in that they do not retry steps outside of
- * a CHOOSELEAF step.  In the case of a crush rule like
- *
- * rule replicated_rule_1 {
- *   ...
- *   step take default class hdd
- *   step chooseleaf firstn 3 type host
- *   step emit
- * }
- *
- * the chooseleaf step will ensure that if all of the osds on a
- * particular host are marked out, mappings including those OSDs would
- * end up on another host (provided that there are enough hosts).
- *
- * However, if the rule used two choose steps instead
- *
- * rule replicated_rule_1 {
- *   ...
- *   step take default class hdd
- *   step choose firstn 3 type host
- *   step choose firstn 1 type osd
- *   step emit
- * }
- *
- * marking an OSD down could cause it to be remapped to another on the same
- * host, but not to another host.  If all of the OSDs on a host are marked
- * down, the PGs will simply be degraded and unable to remap until the host
- * is removed from the CRUSH heirarchy or reweighted to 0.
- *
- * Normally, we can comfortably work around this by using a chooseleaf
- * step as in the first example, but there are cases where we want to map
- * multiple OSDs to each host (wide EC codes on small clusters, for
- * example) which can't be handled with chooseleaf as it currently
- * exists.
- *
- * rule ecpool-86 {
- *   type msr_indep
- *   ...
- *   step choosemsr 4 type host
- *   step choosemsr 4 type osd
- *   step emit
- * }
- *
- * With an 8+6 code, this rule can tolerate a host and a single OSD down without
- * becoming unavailable on 4 hosts.  It relies on ensuring that no more than 4
- * OSDs are mapped to any single host, however, which can't be done with a
- * conventional CRUSH rule without the drawback described above.  By using
- * msr_indep, this rule can deal with an OSD failure by remapping to another
- * host.
- *
- * MSR rules have some structural differences from conventional rules:
- * - The rule type determines whether the mapping is FIRSTN or INDEP.  Because
- *   the descent can retry steps, it doesn't really make sense for steps to
- *   individually specify output order and I'm not really aware of any use cases
- *   that would benefit from it.
- * - MSR rules *must* be structured as a (possibly empty) prefix of config
- *   steps (CRUSH_RULE_SET_CHOOSE_MSR*) followed by a sequence of EMIT blocks
- *   each comprised of a TAKE step, a sequence of CHOOSE_MSR steps, and
- *   ended by an EMIT step.
- * - MSR choose steps must be choosemsr.  choose and chooseleaf are not permitted.
- *
- * MSR rules also have different requirements for working space.  Conventional CRUSH
- * requires 3 vectors of size result_max to use for working space -- two to alternate
- * as it processes each rule and one, additionally, for chooseleaf.  MSR rules
- * need N vectors where N is the number of choosemsr in the longest EMIT block since
- * it needs to retain all of the choices made as part of each descent.
- *
- * See crush_msr_choose for details.
- *
- * @map: the crush_map
- * @ruleno: the rule id
- * @x: hash input
- * @result: pointer to result vector
- * @result_max: maximum result size
- * @weight: weight vector (for map leaves)
- * @weight_max: size of weight vector
- * @cwin: Pointer to at least map->working_size bytes of memory or NULL.
- */
-static int crush_msr_do_rule(
-       const struct crush_map *map,
-       int ruleno, int map_input, int *result, int result_max,
-       const __u32 *weight, int weight_max,
-       void *cwin, const struct crush_choose_arg *choose_args)
-{
-       unsigned msr_descents = map->msr_descents;
-       unsigned msr_collision_tries = map->msr_collision_tries;
-       struct crush_rule *rule = map->rules[ruleno];
-       unsigned start_stepno = crush_msr_scan_config_steps(
-               rule->steps, rule->len,
-               &msr_descents, &msr_collision_tries);
-
-       struct crush_msr_input input = {
-               .map = map,
-               .rule = map->rules[ruleno],
-               .result_max = result_max,
-               .weight_len = weight_max,
-               .weights = weight,
-               .map_input = map_input,
-               .choose_args = choose_args,
-               .msr_descents = msr_descents,
-               .msr_collision_tries = msr_collision_tries 
-       };
-
-       struct crush_msr_output output = {
-               .result_len = result_max,
-               .returned_so_far = 0,
-               .out = result
-       };
-       for (unsigned i = 0; i < output.result_len; ++i) {
-               output.out[i] = CRUSH_ITEM_NONE;
-       }
-
-       unsigned start_index = 0;
-       while (start_stepno < input.rule->len) {
-               unsigned emit_stepno, total_children;
-               if (crush_msr_scan_next(
-                           input.rule, input.result_max,
-                           start_stepno, &total_children,
-                           &emit_stepno) != 0) {
-                       // invalid rule, return whatever we have
-                       dprintk("crush_msr_scan_returned -1\n");
-                       return 0;
-               }
-
-               const struct crush_rule_step *take_step =
-                       &(input.rule->steps[start_stepno]);
-               BUG_ON(take_step->op != CRUSH_RULE_TAKE);
-
-               if (take_step->arg1 >= 0) {
-                       if (start_stepno + 1 != emit_stepno) {
-                               // invalid rule
-                               dprintk("take step specifies osd, but "
-                                       "there are subsequent choose steps\n");
-                               return 0;
-                       } else {
-                               crush_msr_emit_result(
-                                       &output, input.rule->type,
-                                       start_index, take_step->arg1);
-                       }
-               } else {
-                       dprintk("start_stepno %d\n", start_stepno);
-                       dprintk("root bucket: %d\n",
-                               input.rule->steps[start_stepno].arg1);
-                       struct crush_bucket *root_bucket = input.map->buckets[
-                               -1 - input.rule->steps[start_stepno].arg1];
-                       dprintk(
-                               "root bucket: %d %p\n",
-                               input.rule->steps[start_stepno].arg1,
-                               root_bucket);
-
-                       ++start_stepno;
-                       BUG_ON(emit_stepno >= input.rule->len);
-                       BUG_ON(emit_stepno < start_stepno);
-                       BUG_ON(start_stepno >= input.rule->len);
-
-                       struct crush_work *cw = cwin;
-                       int *out_vecs[input.rule->len];
-                       for (unsigned stepno = 0; stepno < input.rule->len; ++stepno) {
-                               out_vecs[stepno] = (int*)((char*)cw + map->working_size) +
-                                       (stepno * result_max);
-                       }
-                       struct crush_msr_workspace workspace = {
-                               .start_stepno = start_stepno,
-                               .end_stepno = emit_stepno,
-                               .result_len = result_max,
-                               .crush_work = cw,
-                               .step_vecs = out_vecs
-                       };
-                       crush_msr_clear_workspace(&workspace);
-
-
-                       unsigned tries_so_far = 0;
-                       unsigned end_index = MIN(start_index + total_children,
-                                                input.result_max);
-                       while (tries_so_far <= input.msr_descents &&
-                              output.returned_so_far < input.result_max) {
-                               crush_msr_choose(
-                                       &input, &workspace, &output,
-                                       root_bucket,
-                                       total_children,
-                                       start_index,
-                                       end_index,
-                                       start_stepno, emit_stepno,
-                                       tries_so_far);
-                               dprintk("returned_so_far: %d\n",
-                                       output.returned_so_far);
-                               ++tries_so_far;
-                       }
-                       start_index = end_index;
-                       start_stepno = emit_stepno + 1;
-               }
-       }
-
-       if (rule->type == CRUSH_RULE_TYPE_MSR_FIRSTN) {
-         return output.returned_so_far;
-       } else {
-         return input.result_max;
-       }
-}
-
-/// Return 1 if msr, 0 otherwise
-static int rule_type_is_msr(int type)
-{
-       return type == CRUSH_RULE_TYPE_MSR_FIRSTN ||
-               type == CRUSH_RULE_TYPE_MSR_INDEP;
-}
-
-size_t crush_work_size(const struct crush_map *map,
-                      int result_max)
-{
-       unsigned ruleno;
-       unsigned out_vecs = 3; /* normal do_rule needs 3 outvecs */
-       for (ruleno = 0; ruleno < map->max_rules; ++ruleno) {
-               const struct crush_rule *rule = map->rules[ruleno];
-               if (!rule) continue;
-               if (!rule_type_is_msr(rule->type))
-                       continue;
-               unsigned rule_max_msr_steps;
-               // we ignore the return value because rule_max_msr_steps will be
-               // populated with the longest step sequence before hitting
-               // the error
-               crush_msr_scan_rule(rule, result_max, &rule_max_msr_steps);
-               out_vecs = MAX(rule_max_msr_steps, out_vecs);
-       }
-       return map->working_size + result_max * out_vecs * sizeof(__u32);
-}
-
-/* This takes a chunk of memory and sets it up to be a shiny new
-   working area for a CRUSH placement computation. It must be called
-   on any newly allocated memory before passing it in to
-   crush_do_rule. It may be used repeatedly after that, so long as the
-   map has not changed. If the map /has/ changed, you must make sure
-   the working size is no smaller than what was allocated and re-run
-   crush_init_workspace.
-
-   If you do retain the working space between calls to crush, make it
-   thread-local. If you reinstitute the locking I've spent so much
-   time getting rid of, I will be very unhappy with you. */
-
-void crush_init_workspace(const struct crush_map *m, void *v) {
-       /* We work by moving through the available space and setting
-          values and pointers as we go.
-
-          It's a bit like Forth's use of the 'allot' word since we
-          set the pointer first and then reserve the space for it to
-          point to by incrementing the point. */
-       struct crush_work *w = (struct crush_work *)v;
-       char *point = (char *)v;
-       __s32 b;
-       point += sizeof(struct crush_work);
-       w->work = (struct crush_work_bucket **)point;
-       point += m->max_buckets * sizeof(struct crush_work_bucket *);
-       for (b = 0; b < m->max_buckets; ++b) {
-               if (m->buckets[b] == 0)
-                       continue;
-
-               w->work[b] = (struct crush_work_bucket *) point;
-               switch (m->buckets[b]->alg) {
-               default:
-                       point += sizeof(struct crush_work_bucket);
-                       break;
-               }
-               w->work[b]->perm_x = 0;
-               w->work[b]->perm_n = 0;
-               w->work[b]->perm = (__u32 *)point;
-               point += m->buckets[b]->size * sizeof(__u32);
-       }
-       BUG_ON((char *)point - (char *)w != m->working_size);
-}
-
-/**
- * crush_do_rule - calculate a mapping with the given input and rule
- * @map: the crush_map
- * @ruleno: the rule id
- * @x: hash input
- * @result: pointer to result vector
- * @result_max: maximum result size
- * @weight: weight vector (for map leaves)
- * @weight_max: size of weight vector
- * @cwin: Pointer to at least map->working_size bytes of memory or NULL.
- */
-int crush_do_rule(const struct crush_map *map,
-                 int ruleno, int x, int *result, int result_max,
-                 const __u32 *weight, int weight_max,
-                 void *cwin, const struct crush_choose_arg *choose_args)
-{
-       const struct crush_rule *rule;
-
-       if ((__u32)ruleno >= map->max_rules) {
-               dprintk(" bad ruleno %d\n", ruleno);
-               return 0;
-       }
-
-       rule = map->rules[ruleno];
-       if (rule_type_is_msr(rule->type)) {
-               return crush_msr_do_rule(
-                       map,
-                       ruleno,
-                       x,
-                       result,
-                       result_max,
-                       weight,
-                       weight_max,
-                       cwin,
-                       choose_args);
-       } else {
-               return crush_do_rule_no_retry(
-                       map,
-                       ruleno,
-                       x,
-                       result,
-                       result_max,
-                       weight,
-                       weight_max,
-                       cwin,
-                       choose_args);
-       }
-}
index 98c7bf11c0d990cb35b043a858a20b2c8c0c52e7..0ec927d9e6162e3d5b1a3b4cc2e539c7b028f065 100644 (file)
@@ -77,11 +77,15 @@ extern int crush_do_rule(const struct crush_map *map,
                         const __u32 *weights, int weight_max,
                         void *cwin, const struct crush_choose_arg *choose_args);
 
-/* Returns enough workspace for any crush rule within map to generate
-   result_max outputs. The caller can then allocate this much on its own,
-   either on the stack, in a per-thread long-lived buffer, or however it likes.*/
-extern size_t crush_work_size(const struct crush_map *map,
-                             int result_max);
+/* Returns the exact amount of workspace that will need to be used
+   for a given combination of crush_map and result_max. The caller can
+   then allocate this much on its own, either on the stack, in a
+   per-thread long-lived buffer, or however it likes. */
+
+static inline size_t crush_work_size(const struct crush_map *map,
+                                    int result_max) {
+       return map->working_size + result_max * 3 * sizeof(__u32);
+}
 
 extern void crush_init_workspace(const struct crush_map *m, void *v);
 
index 928d05f2adb0aada951d72829ad59b9f61c72c3f..5212baee25187072512da7bd137ae640cf92bf96 100644 (file)
@@ -52,12 +52,6 @@ int ErasureCode::init(
   err |= to_string("crush-failure-domain", profile,
                   &rule_failure_domain,
                   DEFAULT_RULE_FAILURE_DOMAIN, ss);
-  err |= to_int("crush-osds-per-failure-domain", profile,
-               &rule_osds_per_failure_domain,
-               "0", ss);
-  err |= to_int("crush-num-failure-domains", profile,
-               &rule_num_failure_domains,
-               "0", ss);
   err |= to_string("crush-device-class", profile,
                   &rule_device_class,
                   "", ss);
@@ -72,33 +66,19 @@ int ErasureCode::create_rule(
   CrushWrapper &crush,
   std::ostream *ss) const
 {
-  if (rule_osds_per_failure_domain <= 1) {
-    return crush.add_simple_rule(
-      name,
-      rule_root,
-      rule_failure_domain,
-      rule_num_failure_domains,
-      rule_device_class,
-      "indep",
-      pg_pool_t::TYPE_ERASURE,
-      ss);
-  } else {
-    if (rule_num_failure_domains < 1)  {
-      if (ss) {
-       *ss << "crush-num-failure-domains " << rule_num_failure_domains 
-           << " must be >= 1 if crush-osds-per-failure-domain specified";
-       return -EINVAL;
-      }
-    }
-    return crush.add_indep_multi_osd_per_failure_domain_rule(
-      name,
-      rule_root,
-      rule_failure_domain,
-      rule_num_failure_domains,
-      rule_osds_per_failure_domain,
-      rule_device_class,
-      ss);
-  }
+  int ruleid = crush.add_simple_rule(
+    name,
+    rule_root,
+    rule_failure_domain,
+    rule_device_class,
+    "indep",
+    pg_pool_t::TYPE_ERASURE,
+    ss);
+
+  if (ruleid < 0)
+    return ruleid;
+
+  return ruleid;
 }
 
 int ErasureCode::sanity_check_k_m(int k, int m, ostream *ss)
index fd6d1a41f714dd131ff838e440b5cc02cc5fd6e2..c246d5dc6b67d9a2d865f6876be53ed2d2ad0390 100644 (file)
@@ -37,8 +37,6 @@ namespace ceph {
     std::string rule_root;
     std::string rule_failure_domain;
     std::string rule_device_class;
-    int rule_osds_per_failure_domain = -1;
-    int rule_num_failure_domains = -1;
 
     ~ErasureCode() override {}
 
index 23175adfa2c8050277e4ba4cffedd43444e90d89..1937eeb4c6987a8bffee5d335975d27dee113991 100644 (file)
@@ -137,7 +137,7 @@ DEFINE_CEPH_FEATURE(34, 3, RANGE_BLOCKLIST)
 DEFINE_CEPH_FEATURE(35, 1, OSD_CACHEPOOL)    // 3.14
 DEFINE_CEPH_FEATURE(36, 1, CRUSH_V2)         // 3.14
 DEFINE_CEPH_FEATURE(37, 1, EXPORT_PEER)      // 3.14
-DEFINE_CEPH_FEATURE(38, 2, CRUSH_MSR)        // X.XX TODOSAM kernel version?
+DEFINE_CEPH_FEATURE_RETIRED(38, 1, OSD_ERASURE_CODES, MIMIC, OCTOPUS)
 // available
 DEFINE_CEPH_FEATURE(39, 1, OSDMAP_ENC)       // 3.15
 DEFINE_CEPH_FEATURE(40, 1, MDS_INLINE_DATA)  // 3.19
@@ -218,7 +218,6 @@ DEFINE_CEPH_FEATURE_RETIRED(63, 1, RESERVED_BROKEN, LUMINOUS, QUINCY) // client-
         CEPH_FEATURE_OSD_CACHEPOOL |       \
         CEPH_FEATURE_CRUSH_V2 |            \
         CEPH_FEATURE_EXPORT_PEER |         \
-        CEPH_FEATURE_CRUSH_MSR |           \
         CEPH_FEATURE_OSDMAP_ENC |          \
         CEPH_FEATURE_MDS_INLINE_DATA |     \
         CEPH_FEATURE_CRUSH_TUNABLES3 |     \
@@ -266,10 +265,9 @@ DEFINE_CEPH_FEATURE_RETIRED(63, 1, RESERVED_BROKEN, LUMINOUS, QUINCY) // client-
         CEPH_FEATURE_CRUSH_TUNABLES2 |         \
         CEPH_FEATURE_CRUSH_TUNABLES3 |         \
         CEPH_FEATURE_CRUSH_TUNABLES5 |         \
-        CEPH_FEATURE_CRUSH_MSR |               \
         CEPH_FEATURE_CRUSH_V2 |                \
         CEPH_FEATURE_CRUSH_V4 |                \
-        CEPH_FEATUREMASK_CRUSH_MSR)
+        CEPH_FEATUREMASK_CRUSH_CHOOSE_ARGS)
 
 /*
  * make sure we don't try to use the reserved features
index 696d7f3185b364e5db0ef5cc001bccecbade0814..f8e379326f25f4439e685e75cc18fb391c08ab30 100644 (file)
@@ -7562,12 +7562,6 @@ bool OSDMonitor::validate_crush_against_features(const CrushWrapper *newcrush,
         << newmap.require_min_compat_client;
       return false;
     }
-    if (mv > newmap.require_osd_release) {
-      ss << "new crush map requires client version " << mv
-        << " but require_osd_release is "
-        << newmap.require_osd_release;
-      return false;
-    }
   }
 
   // osd compat
@@ -8078,7 +8072,7 @@ int OSDMonitor::prepare_new_pool(string& name,
     return r;
   }
 
-  if (!osdmap.crush->rule_valid_for_pool_type(crush_rule, pool_type)) {
+  if (osdmap.crush->get_rule_type(crush_rule) != (int)pool_type) {
     *ss << "crush rule " << crush_rule << " type does not match pool";
     return -EINVAL;
   }
@@ -8350,7 +8344,7 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap,
        return -EPERM;
       }
     }
-    if (!osdmap.crush->rule_valid_for_pool_type(p.get_crush_rule(), p.type)) {
+    if (osdmap.crush->get_rule_type(p.get_crush_rule()) != (int)p.type) {
       ss << "crush rule " << p.get_crush_rule() << " type does not match pool";
       return -EINVAL;
     }
@@ -8583,7 +8577,7 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap,
       ss << cpp_strerror(id);
       return -ENOENT;
     }
-    if (!osdmap.crush->rule_valid_for_pool_type(id, p.get_type())) {
+    if (osdmap.crush->get_rule_type(id) != (int)p.get_type()) {
       ss << "crush rule " << id << " type does not match pool";
       return -EINVAL;
     }
index 76552333dfff00b22e64952fa6590a097bdd5fd0..5773695b77ad30dcdfb8ba22aa6740e576d9f597 100644 (file)
@@ -1764,10 +1764,9 @@ uint64_t OSDMap::get_features(int entity_type, uint64_t *pmask) const
     features |= CEPH_FEATURE_CRUSH_V4;
   if (crush->has_nondefault_tunables5())
     features |= CEPH_FEATURE_CRUSH_TUNABLES5;
-  if (crush->has_incompat_choose_args())
+  if (crush->has_incompat_choose_args()) {
     features |= CEPH_FEATUREMASK_CRUSH_CHOOSE_ARGS;
-  if (crush->has_nondefault_tunables_msr())
-    features |= CEPH_FEATURE_CRUSH_MSR;
+  }
   mask |= CEPH_FEATURES_CRUSH;
 
   if (!pg_upmap.empty() || !pg_upmap_items.empty() || !pg_upmap_primaries.empty())
@@ -1790,8 +1789,6 @@ uint64_t OSDMap::get_features(int entity_type, uint64_t *pmask) const
        features |= CEPH_FEATURE_CRUSH_TUNABLES3;
       if (crush->is_v5_rule(ruleid))
        features |= CEPH_FEATURE_CRUSH_TUNABLES5;
-      if (crush->is_msr_rule(ruleid))
-       features |= CEPH_FEATURE_CRUSH_MSR;
     }
   }
   mask |= CEPH_FEATURE_OSDHASHPSPOOL | CEPH_FEATURE_OSD_CACHEPOOL;
@@ -1846,9 +1843,6 @@ ceph_release_t OSDMap::get_min_compat_client() const
 {
   uint64_t f = get_features(CEPH_ENTITY_TYPE_CLIENT, nullptr);
 
-  if (HAVE_FEATURE(f, CRUSH_MSR)) {      // TODOSAM -- add version right before merge
-    return ceph_release_t::squid;        // v19.2.0
-  }
   if (HAVE_FEATURE(f, OSDMAP_PG_UPMAP) ||      // v12.0.0-1733-g27d6f43
       HAVE_FEATURE(f, CRUSH_CHOOSE_ARGS)) {    // v12.0.1-2172-gef1ef28
     return ceph_release_t::luminous;  // v12.2.0
@@ -4530,7 +4524,7 @@ int OSDMap::validate_crush_rules(CrushWrapper *newcrush,
          << " but it is not present";
       return -EINVAL;
     }
-    if (!newcrush->rule_valid_for_pool_type(ruleno, pool.get_type())) {
+    if (newcrush->get_rule_type(ruleno) != (int)pool.get_type()) {
       *ss << "pool " << i.first << " type does not match rule " << ruleno;
       return -EINVAL;
     }
index 99120f0f211fe163612bcb201e85111d6c41030c..e0956ec0a754dced5ab32dd8a67357c81a07049b 100644 (file)
           "chooseleaf_descend_once": 0,
           "chooseleaf_vary_r": 0,
           "chooseleaf_stable": 0,
-          "msr_descents": 100,
-          "msr_collision_tries": 100,
           "straw_calc_version": 0,
           "allowed_bucket_algs": 22,
           "profile": "argonaut",
           "has_v3_rules": 0,
           "has_v4_buckets": 1,
           "require_feature_tunables5": 0,
-          "has_v5_rules": 0,
-          "has_msr_rules": 0
+          "has_v5_rules": 0
       },
       "choose_args": {
           "1": [],
index 695c4e0c22d9d1386886f1722b127287ddaab615..520f11e50d5cf83f381108da7786071c8a185e51 100644 (file)
@@ -6,7 +6,7 @@
   osdmaptool: exported crush map to oc
   $ osdmaptool --import-crush oc myosdmap
   osdmaptool: osdmap file 'myosdmap'
-  osdmaptool: imported 499 byte crush map from oc
+  osdmaptool: imported 497 byte crush map from oc
   osdmaptool: writing epoch 3 to myosdmap
   $ osdmaptool --adjust-crush-weight 0:5 myosdmap
   osdmaptool: osdmap file 'myosdmap'
index 9e2a2c99fd224da80dfb020e42bac360f388fbd5..1f53084a70afbc707f0158874a13d10affbb6e9f 100644 (file)
 #include "include/stringify.h"
 
 #include "crush/CrushWrapper.h"
-#include "crush/CrushCompiler.h"
 #include "osd/osd_types.h"
 
 using namespace std;
 
+std::unique_ptr<CrushWrapper> build_indep_map(CephContext *cct, int num_rack,
+                              int num_host, int num_osd)
+{
+  std::unique_ptr<CrushWrapper> c(new CrushWrapper);
+  c->create();
+
+  c->set_type_name(5, "root");
+  c->set_type_name(4, "row");
+  c->set_type_name(3, "rack");
+  c->set_type_name(2, "chasis");
+  c->set_type_name(1, "host");
+  c->set_type_name(0, "osd");
+
+  int rootno;
+  c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
+               5, 0, NULL, NULL, &rootno);
+  c->set_item_name(rootno, "default");
+
+  map<string,string> loc;
+  loc["root"] = "default";
+
+  int osd = 0;
+  for (int r=0; r<num_rack; ++r) {
+    loc["rack"] = string("rack-") + stringify(r);
+    for (int h=0; h<num_host; ++h) {
+      loc["host"] = string("host-") + stringify(r) + string("-") + stringify(h);
+      for (int o=0; o<num_osd; ++o, ++osd) {
+       c->insert_item(cct, osd, 1.0, string("osd.") + stringify(osd), loc);
+      }
+    }
+  }
+  int ret;
+  int ruleno = 0;
+  ret = c->add_rule(ruleno, 4, 123);
+  ceph_assert(ret == ruleno);
+  ret = c->set_rule_step(ruleno, 0, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 10, 0);
+  ceph_assert(ret == 0);
+  ret = c->set_rule_step(ruleno, 1, CRUSH_RULE_TAKE, rootno, 0);
+  ceph_assert(ret == 0);
+  ret = c->set_rule_step(ruleno, 2, CRUSH_RULE_CHOOSELEAF_INDEP, CRUSH_CHOOSE_N, 1);
+  ceph_assert(ret == 0);
+  ret = c->set_rule_step(ruleno, 3, CRUSH_RULE_EMIT, 0, 0);
+  ceph_assert(ret == 0);
+  c->set_rule_name(ruleno, "data");
+
+  c->finalize();
+
+  if (false) {
+    Formatter *f = Formatter::create("json-pretty");
+    f->open_object_section("crush_map");
+    c->dump(f);
+    f->close_section();
+    f->flush(cout);
+    delete f;
+  }
+
+  return c;
+}
+
 int get_num_dups(const vector<int>& v)
 {
   std::set<int> s;
@@ -36,21 +94,7 @@ int get_num_dups(const vector<int>& v)
   return dups;
 }
 
-class RuleType {
-  bool msr;
-
-public:
-  RuleType(bool msr) : msr(msr) {}
-
-  bool is_msr() const { return msr; }
-  
-  friend std::ostream &operator<<(std::ostream &, RuleType);
-};
-std::ostream &operator<<(std::ostream &lhs, RuleType rhs) {
-  return lhs << (rhs.msr ? "MSR" : "NORMAL");
-}
-
-class IndepTest : public ::testing::TestWithParam<RuleType>
+class CRUSHTest : public ::testing::Test
 {
 public:
   void SetUp() final
@@ -64,91 +108,11 @@ public:
     cct->put();
     cct = nullptr;
   }
-
-  std::unique_ptr<CrushWrapper> build_indep_map(
-    CephContext *cct, int num_rack, int num_host, int num_osd)
-  {
-    std::unique_ptr<CrushWrapper> c(new CrushWrapper);
-    c->create();
-    c->set_tunables_optimal();
-
-    c->set_type_name(5, "root");
-    c->set_type_name(4, "row");
-    c->set_type_name(3, "rack");
-    c->set_type_name(2, "chasis");
-    c->set_type_name(1, "host");
-    c->set_type_name(0, "osd");
-
-    int rootno;
-    c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
-                 5, 0, NULL, NULL, &rootno);
-    c->set_item_name(rootno, "default");
-
-    map<string,string> loc;
-    loc["root"] = "default";
-
-    int osd = 0;
-    for (int r=0; r<num_rack; ++r) {
-      loc["rack"] = string("rack-") + stringify(r);
-      for (int h=0; h<num_host; ++h) {
-       loc["host"] = string("host-") + stringify(r) + string("-") + stringify(h);
-       for (int o=0; o<num_osd; ++o, ++osd) {
-         c->insert_item(cct, osd, 1.0, string("osd.") + stringify(osd), loc);
-       }
-      }
-    }
-    int ret;
-    int ruleno = 0;
-
-    if (GetParam().is_msr()) {
-      unsigned step_id = 0;
-      ret = c->add_rule(ruleno, 4, CRUSH_RULE_TYPE_MSR_INDEP);
-      ceph_assert(ret == ruleno);
-      ret = c->set_rule_step(ruleno, step_id++, CRUSH_RULE_TAKE, rootno, 0);
-      ceph_assert(ret == 0);
-      ret = c->set_rule_step(
-       ruleno, step_id++, CRUSH_RULE_CHOOSE_MSR, CRUSH_CHOOSE_N, 1);
-      ceph_assert(ret == 0);
-      ret = c->set_rule_step(ruleno, step_id++, CRUSH_RULE_CHOOSE_MSR, 1, 0);
-      ceph_assert(ret == 0);
-      ret = c->set_rule_step(ruleno, step_id++, CRUSH_RULE_EMIT, 0, 0);
-      ceph_assert(ret == 0);
-    } else {
-      unsigned step_id = 0;
-      ret = c->add_rule(ruleno, 4, CRUSH_RULE_TYPE_ERASURE);
-      ceph_assert(ret == ruleno);
-      ret = c->set_rule_step(
-       ruleno, step_id++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 10, 0);
-      ceph_assert(ret == 0);
-      ret = c->set_rule_step(ruleno, step_id++, CRUSH_RULE_TAKE, rootno, 0);
-      ceph_assert(ret == 0);
-      ret = c->set_rule_step(
-       ruleno, step_id++, CRUSH_RULE_CHOOSELEAF_INDEP, CRUSH_CHOOSE_N, 1);
-      ceph_assert(ret == 0);
-      ret = c->set_rule_step(ruleno, step_id++, CRUSH_RULE_EMIT, 0, 0);
-      ceph_assert(ret == 0);
-    }
-
-    c->set_rule_name(ruleno, "data");
-    c->finalize();
-
-    if (false) {
-      Formatter *f = Formatter::create("json-pretty");
-      f->open_object_section("crush_map");
-      c->dump(f);
-      f->close_section();
-      f->flush(cout);
-      delete f;
-    }
-
-    return c;
-  }
-
 protected:
   CephContext *cct = nullptr;
 };
 
-TEST_P(IndepTest, toosmall) {
+TEST_F(CRUSHTest, indep_toosmall) {
   std::unique_ptr<CrushWrapper> c(build_indep_map(cct, 1, 3, 1));
   vector<__u32> weight(c->get_max_devices(), 0x10000);
   c->dump_tree(&cout, NULL);
@@ -167,7 +131,7 @@ TEST_P(IndepTest, toosmall) {
   }
 }
 
-TEST_P(IndepTest, basic) {
+TEST_F(CRUSHTest, indep_basic) {
   std::unique_ptr<CrushWrapper> c(build_indep_map(cct, 3, 3, 3));
   vector<__u32> weight(c->get_max_devices(), 0x10000);
   c->dump_tree(&cout, NULL);
@@ -186,88 +150,7 @@ TEST_P(IndepTest, basic) {
   }
 }
 
-TEST_P(IndepTest, single_out_first) {
-  std::unique_ptr<CrushWrapper> c(build_indep_map(cct, 3, 3, 3));
-  c->dump_tree(&cout, NULL);
-
-  for (int x = 0; x < 1000; ++x) {
-    vector<__u32> weight(c->get_max_devices(), 0x10000);
-    vector<int> out;
-    c->do_rule(0, x, out, 5, weight, 0);
-
-    int num_none = 0;
-    for (unsigned i=0; i<out.size(); ++i) {
-      if (out[i] == CRUSH_ITEM_NONE)
-       num_none++;
-    }
-    ASSERT_EQ(0, num_none);
-    ASSERT_EQ(0, get_num_dups(out));
-
-    // mark first osd out
-    weight[out[0]] = 0;
-
-    vector<int> out2;
-    c->do_rule(0, x, out2, 5, weight, 0);
-
-    cout << "input " << x
-        << " marked out " << out[0]
-        << " out " << out
-        << " -> out2 " << out2
-        << std::endl;
-
-    // First item should have been remapped
-    ASSERT_NE(CRUSH_ITEM_NONE, out2[0]);
-    ASSERT_NE(out[0], out2[0]);
-    for (unsigned i=1; i<out.size(); ++i) {
-      // but none of the others
-      ASSERT_EQ(out[i], out2[i]);
-    }
-    ASSERT_EQ(0, get_num_dups(out2));
-  }
-}
-
-TEST_P(IndepTest, single_out_last) {
-  std::unique_ptr<CrushWrapper> c(build_indep_map(cct, 3, 3, 3));
-  c->dump_tree(&cout, NULL);
-
-  for (int x = 0; x < 1000; ++x) {
-    vector<__u32> weight(c->get_max_devices(), 0x10000);
-    vector<int> out;
-    c->do_rule(0, x, out, 5, weight, 0);
-
-    int num_none = 0;
-    for (unsigned i=0; i<out.size(); ++i) {
-      if (out[i] == CRUSH_ITEM_NONE)
-       num_none++;
-    }
-    ASSERT_EQ(0, num_none);
-    ASSERT_EQ(0, get_num_dups(out));
-
-    // mark first osd out
-    unsigned last = out.size() - 1;
-    weight[out[last]] = 0;
-
-    vector<int> out2;
-    c->do_rule(0, x, out2, 5, weight, 0);
-
-    cout << "input " << x
-        << " marked out " << out[0]
-        << " out " << out
-        << " -> out2 " << out2
-        << std::endl;
-
-    // Last
-    ASSERT_NE(CRUSH_ITEM_NONE, out2[last]);
-    ASSERT_NE(out[last], out2[last]);
-    for (unsigned i=0; i<last; ++i) {
-      // but none of the others
-      ASSERT_EQ(out[i], out2[i]);
-    }
-    ASSERT_EQ(0, get_num_dups(out2));
-  }
-}
-
-TEST_P(IndepTest, out_alt) {
+TEST_F(CRUSHTest, indep_out_alt) {
   std::unique_ptr<CrushWrapper> c(build_indep_map(cct, 3, 3, 3));
   vector<__u32> weight(c->get_max_devices(), 0x10000);
 
@@ -293,7 +176,7 @@ TEST_P(IndepTest, out_alt) {
   }
 }
 
-TEST_P(IndepTest, out_contig) {
+TEST_F(CRUSHTest, indep_out_contig) {
   std::unique_ptr<CrushWrapper> c(build_indep_map(cct, 3, 3, 3));
   vector<__u32> weight(c->get_max_devices(), 0x10000);
 
@@ -318,7 +201,8 @@ TEST_P(IndepTest, out_contig) {
   }
 }
 
-TEST_P(IndepTest, out_progressive) {
+
+TEST_F(CRUSHTest, indep_out_progressive) {
   std::unique_ptr<CrushWrapper> c(build_indep_map(cct, 3, 3, 3));
   c->set_choose_total_tries(100);
   vector<__u32> tweight(c->get_max_devices(), 0x10000);
@@ -333,15 +217,8 @@ TEST_P(IndepTest, out_progressive) {
     for (unsigned i=0; i<weight.size(); ++i) {
       vector<int> out;
       c->do_rule(0, x, out, 7, weight, 0);
-      cout << "(" << i << "/" << weight.size() << " out) ";
-      if (i > 0) cout << "marked out " << i - 1 << " ";
-      cout << x << " -> " << out << std::endl;
-
-      int num_none = 0;
-      for (unsigned k=0; k<out.size(); ++k) {
-       if (out[k] == CRUSH_ITEM_NONE)
-         num_none++;
-      }
+      cout << "(" << i << "/" << weight.size() << " out) "
+          << x << " -> " << out << std::endl;
       ASSERT_EQ(0, get_num_dups(out));
 
       // make sure nothing moved
@@ -361,6 +238,7 @@ TEST_P(IndepTest, out_progressive) {
            cout << " " << out[j] << " moved from " << pos[out[j]] << " to " << j << std::endl;
            ++moved;
          }
+         //ASSERT_EQ(j, pos[out[j]]);
        }
       }
       if (moved || changed)
@@ -382,334 +260,6 @@ TEST_P(IndepTest, out_progressive) {
 
 }
 
-INSTANTIATE_TEST_SUITE_P(
-  IndepTest,
-  IndepTest,
-  ::testing::Values(RuleType(true), RuleType(false)),
-  testing::PrintToStringParamName());
-
-class FirstnTest : public ::testing::TestWithParam<RuleType>
-{
-public:
-  void SetUp() final
-  {
-    CephInitParameters params(CEPH_ENTITY_TYPE_CLIENT);
-    cct = common_preinit(params, CODE_ENVIRONMENT_UTILITY,
-                        CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
-  }
-  void TearDown() final
-  {
-    cct->put();
-    cct = nullptr;
-  }
-
-  std::unique_ptr<CrushWrapper> build_firstn_map(
-    CephContext *cct, int num_rack, int num_host, int num_osd)
-  {
-    std::unique_ptr<CrushWrapper> c(new CrushWrapper);
-    c->create();
-    c->set_tunables_optimal();
-
-    c->set_type_name(5, "root");
-    c->set_type_name(4, "row");
-    c->set_type_name(3, "rack");
-    c->set_type_name(2, "chasis");
-    c->set_type_name(1, "host");
-    c->set_type_name(0, "osd");
-
-    int rootno;
-    c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
-                 5, 0, NULL, NULL, &rootno);
-    c->set_item_name(rootno, "default");
-
-    map<string,string> loc;
-    loc["root"] = "default";
-
-    int osd = 0;
-    for (int r=0; r<num_rack; ++r) {
-      loc["rack"] = string("rack-") + stringify(r);
-      for (int h=0; h<num_host; ++h) {
-       loc["host"] = string("host-") + stringify(r) + string("-") + stringify(h);
-       for (int o=0; o<num_osd; ++o, ++osd) {
-         c->insert_item(cct, osd, 1.0, string("osd.") + stringify(osd), loc);
-       }
-      }
-    }
-    int ret;
-    int ruleno = 0;
-
-    if (GetParam().is_msr()) {
-      unsigned step_id = 0;
-      ret = c->add_rule(ruleno, 4, CRUSH_RULE_TYPE_MSR_FIRSTN);
-      ceph_assert(ret == ruleno);
-      ret = c->set_rule_step(ruleno, step_id++, CRUSH_RULE_TAKE, rootno, 0);
-      ceph_assert(ret == 0);
-      ret = c->set_rule_step(
-       ruleno, step_id++, CRUSH_RULE_CHOOSE_MSR, CRUSH_CHOOSE_N, 1);
-      ceph_assert(ret == 0);
-      ret = c->set_rule_step(ruleno, step_id++, CRUSH_RULE_CHOOSE_MSR, 1, 0);
-      ceph_assert(ret == 0);
-      ret = c->set_rule_step(ruleno, step_id++, CRUSH_RULE_EMIT, 0, 0);
-      ceph_assert(ret == 0);
-    } else {
-      unsigned step_id = 0;
-      ret = c->add_rule(ruleno, 4, CRUSH_RULE_TYPE_ERASURE);
-      ceph_assert(ret == ruleno);
-      ret = c->set_rule_step(
-       ruleno, step_id++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 0, 0);
-      ceph_assert(ret == 0);
-      ret = c->set_rule_step(ruleno, step_id++, CRUSH_RULE_TAKE, rootno, 0);
-      ceph_assert(ret == 0);
-      ret = c->set_rule_step(
-       ruleno, step_id++, CRUSH_RULE_CHOOSELEAF_FIRSTN, CRUSH_CHOOSE_N, 1);
-      ceph_assert(ret == 0);
-      ret = c->set_rule_step(ruleno, step_id++, CRUSH_RULE_EMIT, 0, 0);
-      ceph_assert(ret == 0);
-    }
-
-    c->set_rule_name(ruleno, "data");
-    c->finalize();
-
-    if (false) {
-      Formatter *f = Formatter::create("json-pretty");
-      f->open_object_section("crush_map");
-      c->dump(f);
-      f->close_section();
-      f->flush(cout);
-      delete f;
-    }
-
-    return c;
-  }
-
-protected:
-  CephContext *cct = nullptr;
-};
-
-TEST_P(FirstnTest, basic) {
-  std::unique_ptr<CrushWrapper> c(build_firstn_map(cct, 3, 3, 3));
-  vector<__u32> weight(c->get_max_devices(), 0x10000);
-  c->dump_tree(&cout, NULL);
-
-  for (int x = 0; x < 100; ++x) {
-    vector<int> out;
-    c->do_rule(0, x, out, 3, weight, 0);
-    cout << x << " -> " << out << std::endl;
-    for (unsigned i=0; i<out.size(); ++i) {
-      EXPECT_NE(out[i], CRUSH_ITEM_NONE);
-    }
-    ASSERT_EQ(3, out.size());
-    ASSERT_EQ(0, get_num_dups(out));
-  }
-}
-
-TEST_P(FirstnTest, toosmall) {
-  std::unique_ptr<CrushWrapper> c(build_firstn_map(cct, 1, 3, 1));
-  vector<__u32> weight(c->get_max_devices(), 0x10000);
-  c->dump_tree(&cout, NULL);
-
-  for (int x = 0; x < 100; ++x) {
-    vector<int> out;
-    c->do_rule(0, x, out, 5, weight, 0);
-    cout << x << " -> " << out << std::endl;
-    for (unsigned i=0; i<out.size(); ++i) {
-      EXPECT_NE(out[i], CRUSH_ITEM_NONE);
-    }
-    ASSERT_EQ(3, out.size());
-    ASSERT_EQ(0, get_num_dups(out));
-  }
-}
-
-TEST_P(FirstnTest, single_out_first) {
-  std::unique_ptr<CrushWrapper> c(build_firstn_map(cct, 3, 3, 3));
-  c->dump_tree(&cout, NULL);
-
-  for (int x = 0; x < 1000; ++x) {
-    vector<__u32> weight(c->get_max_devices(), 0x10000);
-    vector<int> out;
-    c->do_rule(0, x, out, 3, weight, 0);
-
-    for (unsigned i=0; i<out.size(); ++i) {
-      EXPECT_NE(out[i], CRUSH_ITEM_NONE);
-    }
-    ASSERT_EQ(3, out.size());
-    ASSERT_EQ(0, get_num_dups(out));
-
-    // mark first osd out
-    weight[out[0]] = 0;
-
-    vector<int> out2;
-    c->do_rule(0, x, out2, 3, weight, 0);
-
-    cout << "input " << x
-        << " marked out " << out[0]
-        << " out " << out
-        << " -> out2 " << out2
-        << std::endl;
-
-    ASSERT_EQ(3, out2.size());
-    ASSERT_EQ(0, get_num_dups(out2));
-    for (unsigned i=0; i<out2.size(); ++i) {
-      EXPECT_NE(out2[i], out[0]);
-    }
-    if (GetParam().is_msr()) {
-      // normal crush doesn't guarantee this reliably
-      ASSERT_EQ(out2[0], out[1]);
-      ASSERT_EQ(out2[1], out[2]);
-      ASSERT_NE(out2[2], out[0]);
-    }
-  }
-}
-
-TEST_P(FirstnTest, single_out_last) {
-  std::unique_ptr<CrushWrapper> c(build_firstn_map(cct, 3, 3, 3));
-  c->dump_tree(&cout, NULL);
-
-  for (int x = 0; x < 1000; ++x) {
-    vector<__u32> weight(c->get_max_devices(), 0x10000);
-    vector<int> out;
-    c->do_rule(0, x, out, 3, weight, 0);
-
-    for (unsigned i=0; i<out.size(); ++i) {
-      EXPECT_NE(out[i], CRUSH_ITEM_NONE);
-    }
-    ASSERT_EQ(3, out.size());
-    ASSERT_EQ(0, get_num_dups(out));
-
-    // mark first osd out
-    weight[out[2]] = 0;
-
-    vector<int> out2;
-    c->do_rule(0, x, out2, 3, weight, 0);
-
-    cout << "input " << x
-        << " marked out " << out[0]
-        << " out " << out
-        << " -> out2 " << out2
-        << std::endl;
-
-    ASSERT_EQ(3, out2.size());
-    ASSERT_EQ(0, get_num_dups(out2));
-    for (unsigned i=0; i<out2.size(); ++i) {
-      EXPECT_NE(out2[i], out[2]);
-    }
-    ASSERT_EQ(out2[0], out[0]);
-    ASSERT_EQ(out2[1], out[1]);
-    ASSERT_NE(out2[2], out[2]);
-  }
-}
-
-TEST_P(FirstnTest, out_alt) {
-  std::unique_ptr<CrushWrapper> c(build_firstn_map(cct, 3, 3, 3));
-  vector<__u32> weight(c->get_max_devices(), 0x10000);
-
-  // mark a bunch of osds out
-  int num = 3*3*3;
-  for (int i=0; i<num / 2; ++i)
-    weight[i*2] = 0;
-  c->dump_tree(&cout, NULL);
-
-  // need more retries to get 9/9 hosts for x in 0..99
-  if (!GetParam().is_msr()) {
-    c->set_choose_total_tries(500);
-  }
-  for (int x = 0; x < 100; ++x) {
-    vector<int> out;
-    c->do_rule(0, x, out, 9, weight, 0);
-    cout << x << " -> " << out << std::endl;
-    ASSERT_EQ(9, out.size());
-    ASSERT_EQ(0, get_num_dups(out));
-  }
-}
-
-TEST_P(FirstnTest, out_contig) {
-  std::unique_ptr<CrushWrapper> c(build_firstn_map(cct, 3, 3, 3));
-  vector<__u32> weight(c->get_max_devices(), 0x10000);
-
-  // mark a bunch of osds out
-  int num = 3*3*3;
-  for (int i=0; i<num / 3; ++i)
-    weight[i] = 0;
-  c->dump_tree(&cout, NULL);
-
-  // need more retries to get 7/7 hosts for x in 0..99
-  if (!GetParam().is_msr()) {
-    c->set_choose_total_tries(500);
-  }
-  for (int x = 0; x < 100; ++x) {
-    vector<int> out;
-    c->do_rule(0, x, out, 7, weight, 0);
-    cout << x << " -> " << out << std::endl;
-    ASSERT_EQ(6, out.size());
-    ASSERT_EQ(0, get_num_dups(out));
-  }
-}
-
-TEST_P(FirstnTest, out_progressive) {
-  std::unique_ptr<CrushWrapper> c(build_firstn_map(cct, 3, 3, 3));
-  if (!GetParam().is_msr()) {
-    c->set_choose_total_tries(500);
-  }
-  vector<__u32> tweight(c->get_max_devices(), 0x10000);
-  c->dump_tree(&cout, NULL);
-
-  int tchanged = 0;
-  for (int x = 1; x < 5; ++x) {
-    vector<__u32> weight(c->get_max_devices(), 0x10000);
-
-    std::set<int> prev;
-    for (unsigned i=0; i<weight.size(); ++i) {
-      vector<int> out;
-      c->do_rule(0, x, out, 7, weight, 0);
-      cout << "(" << i << "/" << weight.size() << " out) ";
-      if (i > 0) cout << "marked out " << i - 1 << " ";
-      cout << x << " -> " << out << std::endl;
-
-      ASSERT_EQ(0, get_num_dups(out));
-
-      int changed = 0;
-      for (unsigned j=0; j<out.size(); ++j) {
-       if (i && prev.count(out[j]) == 0) {
-         ++changed;
-         ++tchanged;
-       }
-      }
-      if (changed)
-       cout << " " << changed << " changed" << std::endl;
-      ASSERT_LE(changed, 3);
-
-      // mark another osd out
-      weight[i] = 0;
-      prev = std::set<int>{out.begin(), out.end()};
-    }
-  }
-  cout << tchanged << " total changed" << std::endl;
-}
-
-INSTANTIATE_TEST_SUITE_P(
-  FirstnTest,
-  FirstnTest,
-  ::testing::Values(RuleType(true), RuleType(false)),
-  testing::PrintToStringParamName());
-
-class CRUSHTest : public ::testing::Test
-{
-public:
-  void SetUp() final
-  {
-    CephInitParameters params(CEPH_ENTITY_TYPE_CLIENT);
-    cct = common_preinit(params, CODE_ENVIRONMENT_UTILITY,
-                        CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
-  }
-  void TearDown() final
-  {
-    cct->put();
-    cct = nullptr;
-  }
-protected:
-  CephContext *cct = nullptr;
-};
-
 TEST_F(CRUSHTest, straw_zero) {
   // zero weight items should have no effect on placement.
 
@@ -1103,459 +653,3 @@ TEST_F(CRUSHTest, straw2_reweight) {
     cout << "     vs " << estddev << std::endl;
   }
 }
-
-struct cluster_test_spec_t {
-  const int num_osds_per_host;
-  const int num_hosts;
-
-  const int num_hosts_mapped;
-  const int num_mapped_per_host;
-  const int num_mapped_size;
-
-  const int num_osds;
-
-  cluster_test_spec_t(
-    int num_osds_per_host, int num_hosts,
-    int num_hosts_mapped, int num_mapped_per_host, int num_mapped_size)
-    : num_osds_per_host(num_osds_per_host), num_hosts(num_hosts),
-      num_hosts_mapped(num_hosts_mapped),
-      num_mapped_per_host(num_mapped_per_host),
-      num_mapped_size(num_mapped_size),
-      num_osds(num_osds_per_host * num_hosts) {}
-
-  void validate_osd(int osd) const {
-    EXPECT_GE(osd, 0);
-    EXPECT_LT(osd, num_osds);
-  }
-
-  bool check_osd(int osd) const {
-    return osd >= 0 && osd < num_osds;
-  }
-
-  void validate_host(int host) const {
-    assert(host >= 0);
-    assert(host < num_hosts);
-  }
-
-  std::pair<int, int> host_to_osd_range(int host) const {
-    validate_host(host);
-    auto first = host * num_osds_per_host;
-    return std::make_pair(first, first + num_osds_per_host);
-  }
-
-  int osd_to_host(int osd) const {
-    validate_osd(osd);
-    return osd / num_osds_per_host;
-  }
-};
-
-static constexpr int ROOT_TYPE = 2;
-static constexpr int HOST_TYPE = 1;
-static constexpr int OSD_TYPE = 0;
-std::pair<int, std::unique_ptr<CrushWrapper>> create_crush_heirarchy(
-  CephContext *cct,
-  const cluster_test_spec_t &spec)
-{
-  auto c = std::make_unique<CrushWrapper>();
-  c->create();
-  c->set_tunables_optimal();
-
-  
-  c->set_type_name(ROOT_TYPE, "root");
-  c->set_type_name(HOST_TYPE, "host");
-  c->set_type_name(OSD_TYPE, "osd");
-
-  int rootno;
-  c->add_bucket(0, CRUSH_BUCKET_STRAW2, CRUSH_HASH_RJENKINS1,
-              ROOT_TYPE, 0, NULL, NULL, &rootno);
-  c->set_item_name(rootno, "default");
-
-  for (auto host_id = 0; host_id < spec.num_hosts; ++host_id) {
-    const std::string host_name = fmt::format("host{}", host_id);
-    const auto first_host_osd = host_id * spec.num_osds_per_host;
-    const auto next_first_host_osd = first_host_osd + spec.num_osds_per_host;
-    for (auto osd_id = first_host_osd; osd_id < next_first_host_osd; ++osd_id) {
-      const std::string osd_name = fmt::format("osd{}", osd_id);
-      auto ret = c->insert_item(
-       cct, osd_id, 1.0, osd_name,
-       {{ "root", "default"}, {"host", host_name}});
-      EXPECT_EQ(ret, 0);
-    }
-  }
-
-  c->finalize();
-  return std::make_pair(rootno, std::move(c));
-}
-
-std::vector<uint32_t> create_weight_vector(
-  const cluster_test_spec_t &spec)
-{
-  return std::vector<uint32_t>(spec.num_osds, CEPH_OSD_IN);
-}
-
-std::vector<uint32_t> create_weight_vector_first_osd_out(
-  const cluster_test_spec_t &spec,
-  const std::vector<int> &mapping)
-{
-  auto weights = create_weight_vector(spec);
-  spec.validate_osd(mapping[0]);
-  weights[mapping[0]] = CEPH_OSD_OUT;
-  return weights;
-}
-
-std::vector<uint32_t> create_weight_vector_first_host_out(
-  const cluster_test_spec_t &spec,
-  const std::vector<int> &mapping)
-{
-  auto weights = create_weight_vector(spec);
-  const auto [first, end] = spec.host_to_osd_range(spec.osd_to_host(mapping[0]));
-  for (auto i = first; i < end; ++i) {
-    weights[i] = CEPH_OSD_OUT;
-  }
-  return weights;
-}
-
-enum class mapping_change_t {
-  SAME,
-  FAILURE,
-  SAME_HOST,
-  NEW_HOST
-};
-void compare_mappings(
-  const cluster_test_spec_t &spec,
-  const std::vector<int> &before,
-  const std::vector<int> &after,
-  mapping_change_t expectation,
-  const std::pair<int, int> &range)
-{
-  const auto &[begin, end] = range;
-  for (auto i = begin; i < end; ++i) {
-    switch (expectation) {
-    case mapping_change_t::SAME:
-      EXPECT_EQ(before[i], after[i]);
-      break;
-    case mapping_change_t::FAILURE:
-      EXPECT_EQ(CRUSH_ITEM_NONE, after[i]);
-      break;
-    case mapping_change_t::SAME_HOST:
-      EXPECT_NE(before[i], after[i]);
-      if (!spec.check_osd(after[i])) {
-       spec.validate_osd(after[i]);
-      } else {
-       EXPECT_EQ(spec.osd_to_host(before[i]), spec.osd_to_host(after[i]));
-      }
-      break;
-    case mapping_change_t::NEW_HOST:
-      EXPECT_NE(before[i], after[i]);
-      if (!spec.check_osd(after[i])) {
-       spec.validate_osd(after[i]);
-      } else {
-       EXPECT_NE(spec.osd_to_host(before[i]), spec.osd_to_host(after[i]));
-      }
-      break;
-    }
-  }
-}
-
-std::vector<int> get_mapping(
-  const cluster_test_spec_t &spec,
-  CrushWrapper &c,
-  const std::vector<uint32_t> &weights,
-  int ruleno)
-{
-  std::vector<int> out;
-  c.do_rule(
-    ruleno, 0 /* seed */, out, spec.num_mapped_size,
-    weights,
-    0);
-  EXPECT_EQ(std::size(out), spec.num_mapped_size);
-  return out;
-}
-
-unsigned count_mapped(const auto &v) {
-  unsigned ret = 0;
-  for (const auto &i : v) ret += (i != CRUSH_ITEM_NONE);
-  return ret;
-}
-
-TEST_F(CRUSHTest, msr_4_host_2_choose_rule) {
-  cluster_test_spec_t spec{3, 4, 3, 1, 3};
-  auto [rootno, c] = create_crush_heirarchy(cct, spec);
-
-  auto ruleno = c->add_rule(-1, 4, CRUSH_RULE_TYPE_MSR_INDEP);
-  EXPECT_EQ(0, c->set_rule_step_take(ruleno, 0, rootno));
-  EXPECT_EQ(
-    0, c->set_rule_step_choose_msr(ruleno, 1, spec.num_hosts_mapped, HOST_TYPE));
-  EXPECT_EQ(
-    0,
-    c->set_rule_step_choose_msr(
-      ruleno, 2, 1, OSD_TYPE));
-  EXPECT_EQ(0, c->set_rule_step_emit(ruleno, 3));
-
-  auto weights_all_in = create_weight_vector(spec);
-  auto before = get_mapping(spec, *c, weights_all_in, ruleno);
-  for (auto i : before) { spec.validate_osd(i); }
-
-  /* MSR test case.  With normal CRUSH, hitting an out osd won't cause
-   * a retry of the previous step, so marking all of the osds on a host
-   * out will not cause positions mapped to that pg to remap.
-   * However, because the above is an MSR rule type, hitting an out osd
-   * will cause a retry of the previous steps as well.
-   * See https://tracker.ceph.com/issues/62214 for the original motivation */
-  auto weights_host_out = create_weight_vector_first_host_out(spec, before);
-  auto after_host_out = get_mapping(spec, *c, weights_host_out, ruleno);
-
-  CrushCompiler cc{*c, std::cout};
-  cc.decompile(std::cout);
-
-  fmt::print("weights_all_in: {}\n", fmt::join(weights_all_in, ", "));
-  fmt::print("weights_host_out: {}\n", fmt::join(weights_host_out, ", "));
-  fmt::print("before        : {}\n", fmt::join(before, ", "));
-  fmt::print("after_host_out: {}\n", fmt::join(after_host_out, ", "));
-
-  auto count_mapped = [](const auto &v) {
-    unsigned ret = 0;
-    for (const auto &i : v) ret += (i != CRUSH_ITEM_NONE);
-    return ret;
-  };
-
-  EXPECT_EQ(count_mapped(before), count_mapped(after_host_out));
-
-  auto weights_osd_out = create_weight_vector_first_osd_out(spec, before);
-  auto after_osd_out = get_mapping(spec, *c, weights_osd_out, ruleno);
-  EXPECT_EQ(count_mapped(before), count_mapped(after_osd_out));
-}
-
-TEST_F(CRUSHTest, msr_2_host_2_osd) {
-  cluster_test_spec_t spec{2, 3, 2, 2, 3};
-  auto [rootno, c] = create_crush_heirarchy(cct, spec);
-
-  auto ruleno = c->add_rule(-1, 4, CRUSH_RULE_TYPE_MSR_INDEP);
-  EXPECT_EQ(0, c->set_rule_step_take(ruleno, 0, rootno));
-  EXPECT_EQ(
-    0, c->set_rule_step_choose_msr(ruleno, 1, spec.num_hosts_mapped, HOST_TYPE));
-  EXPECT_EQ(
-    0,
-    c->set_rule_step_choose_msr(
-      ruleno, 2, spec.num_mapped_per_host, OSD_TYPE));
-  EXPECT_EQ(0, c->set_rule_step_emit(ruleno, 3));
-
-  auto weights_all_in = create_weight_vector(spec);
-  auto before = get_mapping(spec, *c, weights_all_in, ruleno);
-  for (auto i : before) { spec.validate_osd(i); }
-
-  fmt::print("before        : {}\n", fmt::join(before, ", "));
-  ASSERT_EQ(count_mapped(before), 3);
-
-  /* MSR test case.  With normal CRUSH, hitting an out osd won't cause
-   * a retry of the previous step, so marking all of the osds on a host
-   * out will not cause positions mapped to that pg to remap.
-   * However, because the above is an MSR rule type, hitting an out osd
-   * will cause a retry of the previous steps as well.
-   * See https://tracker.ceph.com/issues/62214 for the original motivation */
-  auto weights_host_out = create_weight_vector_first_host_out(spec, before);
-  auto after_host_out = get_mapping(spec, *c, weights_host_out, ruleno);
-
-  CrushCompiler cc{*c, std::cout};
-  cc.decompile(std::cout);
-
-  fmt::print("weights_all_in: {}\n", fmt::join(weights_all_in, ", "));
-  fmt::print("weights_host_out: {}\n", fmt::join(weights_host_out, ", "));
-  fmt::print("before        : {}\n", fmt::join(before, ", "));
-  fmt::print("after_host_out: {}\n", fmt::join(after_host_out, ", "));
-
-  compare_mappings(
-    spec, before, after_host_out, mapping_change_t::NEW_HOST,
-    {0, spec.num_mapped_per_host});
-  compare_mappings(
-    spec, before, after_host_out, mapping_change_t::SAME,
-    {spec.num_mapped_per_host, spec.num_mapped_size});
-}
-
-TEST_F(CRUSHTest, msr_5_host_8_6_ec_choose) {
-  cluster_test_spec_t spec{4, 5, 4, 4, 14};
-  auto [rootno, c] = create_crush_heirarchy(cct, spec);
-
-  auto ruleno = c->add_rule(-1, 4, CRUSH_RULE_TYPE_MSR_INDEP);
-  unsigned step_id = 0;
-  EXPECT_EQ(0, c->set_rule_step_take(ruleno, step_id++, rootno));
-  EXPECT_EQ(
-    0,
-    c->set_rule_step_choose_msr(
-      ruleno, step_id++, spec.num_hosts_mapped, HOST_TYPE));
-  EXPECT_EQ(
-    0,
-    c->set_rule_step_choose_msr(
-      ruleno, step_id++, spec.num_mapped_per_host, OSD_TYPE));
-  EXPECT_EQ(0, c->set_rule_step_emit(ruleno, step_id++));
-
-  auto weights_all_in = create_weight_vector(spec);
-  auto before = get_mapping(spec, *c, weights_all_in, ruleno);
-  for (auto i : before) { spec.validate_osd(i); }
-
-  /* MSR test case.  With normal CRUSH, hitting an out osd won't cause
-   * a retry of the previous step, so marking all of the osds on a host
-   * out will not cause positions mapped to that pg to remap.
-   * However, because the above is an MSR rule type, hitting an out osd
-   * will cause a retry of the previous steps as well.
-   * See https://tracker.ceph.com/issues/62214 for the original motivation */
-  auto weights_host_out = create_weight_vector_first_host_out(spec, before);
-  auto after_host_out = get_mapping(spec, *c, weights_host_out, ruleno);
-
-  CrushCompiler cc{*c, std::cout};
-  cc.decompile(std::cout);
-
-  fmt::print("weights_all_in: {}\n", fmt::join(weights_all_in, ", "));
-  fmt::print("weights_host_out: {}\n", fmt::join(weights_host_out, ", "));
-  fmt::print("before        : {}\n", fmt::join(before, ", "));
-  fmt::print("after_host_out: {}\n", fmt::join(after_host_out, ", "));
-
-  compare_mappings(
-    spec, before, after_host_out, mapping_change_t::NEW_HOST,
-    {0, spec.num_mapped_per_host});
-  compare_mappings(
-    spec, before, after_host_out, mapping_change_t::SAME,
-    {spec.num_mapped_per_host, spec.num_mapped_size});
-}
-
-TEST_F(CRUSHTest, msr_multi_root) {
-  constexpr unsigned NUM_HOSTS = 4;
-  constexpr unsigned NUM_OSDS_PER_HOST = 3;
-
-  auto c = CrushWrapper();
-  c.create();
-  c.set_tunables_optimal();
-
-  c.set_type_name(ROOT_TYPE, "root");
-  c.set_type_name(HOST_TYPE, "host");
-  c.set_type_name(OSD_TYPE, "osd");
-
-  std::map<int, std::pair<std::string, std::string>> osd_id_to_host_root;
-  std::map<std::string, int> root_name_to_id;
-  std::map<std::string, std::vector<int>> host_name_to_osds;
-  unsigned next_osd_id = 0;
-
-  auto populate_root = [&](const auto &root_name) {
-    int rootno;
-    c.add_bucket(0, CRUSH_BUCKET_STRAW2, CRUSH_HASH_RJENKINS1,
-                ROOT_TYPE, 0, NULL, NULL, &rootno);
-    c.set_item_name(rootno, root_name);
-    root_name_to_id[root_name] = rootno;
-
-    for (unsigned host_id = 0; host_id < NUM_HOSTS; ++host_id) {
-      const std::string host_name =
-       fmt::format("{}-host{}", root_name, host_id);
-      for (unsigned osd = 0; osd < NUM_OSDS_PER_HOST; ++osd) {
-       const int osd_id = next_osd_id++;
-       const std::string osd_name = fmt::format("{}-osd{}", root_name, osd_id);
-       auto ret = c.insert_item(
-         cct, osd_id, 1.0, osd_name,
-         {{ "root", root_name }, { "host", host_name }});
-       osd_id_to_host_root[osd_id] = std::make_pair(host_name, root_name);
-       host_name_to_osds[host_name].push_back(osd_id);
-       EXPECT_EQ(ret, 0);
-      }
-    }
-  };
-
-  int ruleno = 0;
-  int ret = c.add_rule(ruleno, 8, CRUSH_RULE_TYPE_MSR_INDEP);
-  ceph_assert(ret == ruleno);
-
-  unsigned step_id = 0;
-  auto populate_rule = [&](const auto &rule_name) {
-    ret = c.set_rule_step(
-      ruleno, step_id++, CRUSH_RULE_TAKE, root_name_to_id[rule_name], 0);
-    ceph_assert(ret == 0);
-    ret = c.set_rule_step(
-      ruleno, step_id++, CRUSH_RULE_CHOOSE_MSR, 2, HOST_TYPE);
-    ceph_assert(ret == 0);
-    ret = c.set_rule_step(
-      ruleno, step_id++, CRUSH_RULE_CHOOSE_MSR, 2, OSD_TYPE);
-    ceph_assert(ret == 0);
-    ret = c.set_rule_step(ruleno, step_id++, CRUSH_RULE_EMIT, 0, 0);
-    ceph_assert(ret == 0);
-  };
-
-  for (const auto &root_name : { "ssd", "hdd" }) {
-    populate_root(root_name);
-    populate_rule(root_name);
-  }
-  c.set_rule_name(ruleno, "rule_name");
-  c.finalize();
-
-  constexpr unsigned ACTING_SIZE = 8;
-  constexpr unsigned OSDS_PER_ROOT = 4;
-  constexpr unsigned OSDS_PER_HOST = 2;
-  auto validate_output = [&](const auto &out) {
-    std::set<std::string> hosts;
-    for (unsigned host = 0; host < (ACTING_SIZE / OSDS_PER_HOST); ++host) {
-      std::set<std::string> hosts_this_failure_domain;
-      unsigned start = host * OSDS_PER_HOST;
-      unsigned end = (host + 1) * OSDS_PER_HOST;
-      for (unsigned i = start; i < end; ++i) {
-       EXPECT_NE(out[i], CRUSH_ITEM_NONE);
-       EXPECT_EQ(osd_id_to_host_root.count(out[i]), 1);
-       const auto &[host_name, root_name] = osd_id_to_host_root[out[start]];
-       EXPECT_EQ(i < OSDS_PER_ROOT ? "ssd" : "hdd", root_name);
-       hosts_this_failure_domain.insert(host_name);
-      }
-      for (const auto &i: hosts_this_failure_domain) {
-       EXPECT_EQ(hosts.count(i), 0);
-       hosts.insert(i);
-      }
-    }
-  };
-
-  const std::vector<uint32_t> all_in(next_osd_id, CEPH_OSD_IN);
-  for (int x = 0; x < 1000; ++x) {
-    std::vector<int> out;
-    c.do_rule(ruleno, x, out, 8, all_in, 0);
-    EXPECT_EQ(count_mapped(out), 8);
-    validate_output(out);
-
-    {
-      std::vector<uint32_t> osds_out_weight = all_in;
-      std::set<unsigned> osd_idx_out{{1, 5}};
-      for (const auto &i: osd_idx_out) {
-       osds_out_weight[out[i]] = CEPH_OSD_OUT;
-      }
-      std::vector<int> osds_out;
-      c.do_rule(ruleno, x, osds_out, 8, osds_out_weight, 0);
-      EXPECT_EQ(count_mapped(osds_out), 8);
-      validate_output(osds_out);
-      for (unsigned i = 0; i < osds_out.size(); ++i) {
-       if (osd_idx_out.count(i)) {
-         EXPECT_NE(osds_out[i], out[i]);
-       } else {
-         EXPECT_EQ(osds_out[i], out[i]);
-       }
-      }
-    }
-
-    {
-      std::vector<uint32_t> hosts_out_weight = all_in;
-      std::set<unsigned> osd_ids_out;
-
-      for (const auto &i : {2, 6}) {
-       const auto &[host_name, _] = osd_id_to_host_root[out[i]];
-       for (const auto &osd_id: host_name_to_osds[host_name]) {
-         osd_ids_out.insert(osd_id);
-         hosts_out_weight[osd_id] = CEPH_OSD_OUT;
-       }
-      }
-
-      std::vector<int> hosts_out;
-      c.do_rule(ruleno, x, hosts_out, 8, hosts_out_weight, 0);
-      EXPECT_EQ(count_mapped(hosts_out), 8);
-      validate_output(hosts_out);
-      for (unsigned i = 0; i < hosts_out.size(); ++i) {
-       if (osd_ids_out.count(out[i])) {
-         EXPECT_NE(hosts_out[i], out[i]);
-       } else {
-         EXPECT_EQ(hosts_out[i], out[i]);
-       }
-      }
-    }
-  }
-}
index 13155003ab47e00cb0299b5b66f909ee3e367543..0c9ef32377910acc75c99ad238242b60f57760dc 100755 (executable)
@@ -176,9 +176,6 @@ zoned_enabled=0
 io_uring_enabled=0
 with_jaeger=0
 force_addr=0
-osds_per_host=0
-require_osd_and_client_version=""
-use_crush_tunables=""
 
 with_mgr_dashboard=true
 if [[ "$(get_cmake_variable WITH_MGR_DASHBOARD_FRONTEND)" != "ON" ]] ||
@@ -602,21 +599,6 @@ case $1 in
         with_jaeger=1
         echo "with_jaeger $with_jaeger"
         ;;
-    --osds-per-host)
-        osds_per_host="$2"
-        shift
-        echo "osds_per_host $osds_per_host"
-        ;;
-    --require-osd-and-client-version)
-        require_osd_and_client_version="$2"
-        shift
-        echo "require_osd_and_client_version $require_osd_and_client_version"
-        ;;
-    --use-crush-tunables)
-        use_crush_tunables="$2"
-        shift
-        echo "use_crush_tunables $use_crush_tunables"
-        ;;
     *)
         usage_exit
 esac
@@ -1113,15 +1095,6 @@ EOF
     if [ "$crimson" -eq 1 ]; then
         $CEPH_BIN/ceph osd set-allow-crimson --yes-i-really-mean-it
     fi
-
-    if [ -n "$require_osd_and_client_version" ]; then
-        $CEPH_BIN/ceph osd set-require-min-compat-client $require_osd_and_client_version
-        $CEPH_BIN/ceph osd require-osd-release $require_osd_and_client_version --yes-i-really-mean-it
-    fi
-
-    if [ -n "$use_crush_tunables" ]; then
-        $CEPH_BIN/ceph osd crush tunables $use_crush_tunables
-    fi
 }
 
 start_osd() {
@@ -1155,13 +1128,6 @@ start_osd() {
 [osd.$osd]
         host = $HOSTNAME
 EOF
-
-            if [ "$osds_per_host" -gt 0 ]; then
-                wconf <<EOF
-        crush location = root=default host=$HOSTNAME-$(echo "$osd / $osds_per_host" | bc)
-EOF
-            fi
-
             if [ "$spdk_enabled" -eq 1 ]; then
                 wconf <<EOF
         bluestore_block_path = spdk:${bluestore_spdk_dev[$osd]}