From 4f9fdca136bde10479fc173c9bcf4288ea960ace Mon Sep 17 00:00:00 2001 From: Greg Farnum Date: Thu, 17 Jun 2021 19:56:20 +0000 Subject: [PATCH] mon: Sanely set the default CRUSH rule when creating pools in stretch mode If we get a pool create request while in stretch mode that does not explicitly specify a crush rule, look at the stretch-mode pools and their rules, and select the most common one. Also update set_up_stretch_mode.sh to add a few more rules that let me test this locally. Fixes: https://tracker.ceph.com/issues/51270 Signed-off-by: Greg Farnum --- src/mon/OSDMonitor.cc | 45 +++++++++++++++++++++++++++++-- src/mon/OSDMonitor.h | 7 +++++ src/script/set_up_stretch_mode.sh | 24 +++++++++++++++++ 3 files changed, 74 insertions(+), 2 deletions(-) diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 829776e05f4..7b573e43bc5 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -7675,6 +7675,43 @@ int OSDMonitor::prepare_pool_stripe_width(const unsigned pool_type, return err; } +int OSDMonitor::get_replicated_stretch_crush_rule() +{ + /* we don't write down the stretch rule anywhere, so + * we have to guess it. How? Look at all the pools + * and count up how many times a given rule is used + * on stretch pools and then return the one with + * the most users! + */ + map rule_counts; + for (const auto& pooli : osdmap.pools) { + const pg_pool_t& p = pooli.second; + if (p.is_replicated() && p.is_stretch_pool()) { + if (!rule_counts.count(p.crush_rule)) { + rule_counts[p.crush_rule] = 1; + } else { + ++rule_counts[p.crush_rule]; + } + } + } + + if (rule_counts.empty()) { + return -ENOENT; + } + + int most_used_count = 0; + int most_used_rule = -1; + for (auto i : rule_counts) { + if (i.second > most_used_count) { + most_used_rule = i.first; + most_used_count = i.second; + } + } + ceph_assert(most_used_count > 0); + ceph_assert(most_used_rule >= 0); + return most_used_rule; +} + int OSDMonitor::prepare_pool_crush_rule(const unsigned pool_type, const string &erasure_code_profile, const string &rule_name, @@ -7687,8 +7724,12 @@ int OSDMonitor::prepare_pool_crush_rule(const unsigned pool_type, case pg_pool_t::TYPE_REPLICATED: { if (rule_name == "") { - // Use default rule - *crush_rule = osdmap.crush->get_osd_pool_default_crush_replicated_ruleset(cct); + if (osdmap.stretch_mode_enabled) { + *crush_rule = get_replicated_stretch_crush_rule(); + } else { + // Use default rule + *crush_rule = osdmap.crush->get_osd_pool_default_crush_replicated_ruleset(cct); + } if (*crush_rule < 0) { // Errors may happen e.g. if no valid rule is available *ss << "No suitable CRUSH rule exists, check " diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 3532f8f76e1..599fbc1ac4f 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -858,6 +858,13 @@ public: * Sets the osdmap and pg_pool_t values back to healthy stretch mode status. */ void trigger_healthy_stretch_mode(); + /** + * Obtain the crush rule being used for stretch pools. + * Note that right now this is heuristic and simply selects the + * most-used rule on replicated stretch pools. + * @return the crush rule ID, or a negative errno + */ + int get_replicated_stretch_crush_rule(); private: utime_t stretch_recovery_triggered; // what time we committed a switch to recovery mode }; diff --git a/src/script/set_up_stretch_mode.sh b/src/script/set_up_stretch_mode.sh index a9cfd7daa3b..a28f181e6c6 100755 --- a/src/script/set_up_stretch_mode.sh +++ b/src/script/set_up_stretch_mode.sh @@ -24,6 +24,30 @@ rule stretch_rule { step chooseleaf firstn 2 type host step emit } +rule stretch_rule2 { + id 2 + type replicated + min_size 1 + max_size 10 + step take site1 + step chooseleaf firstn 2 type host + step emit + step take site2 + step chooseleaf firstn 2 type host + step emit +} +rule stretch_rule3 { + id 3 + type replicated + min_size 1 + max_size 10 + step take site1 + step chooseleaf firstn 2 type host + step emit + step take site2 + step chooseleaf firstn 2 type host + step emit +} EOF ./bin/crushtool -c crush.map.txt -o crush2.map.bin ./bin/ceph osd setcrushmap -i crush2.map.bin -- 2.39.5