]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: Refactor find_best_info and choose_acting
authorBill Scales <bill_scales@uk.ibm.com>
Mon, 28 Jul 2025 08:21:54 +0000 (09:21 +0100)
committerAlex Ainscow <aainscow@uk.ibm.com>
Wed, 17 Sep 2025 08:43:26 +0000 (09:43 +0100)
Refactor find_best_info to have separate function to calculate
maxles and minlua. The refactor makes history_les_bound
optional, tidy up the choose_acting interface removing this
where it is not used.

Signed-off-by: Bill Scales <bill_scales@uk.ibm.com>
(cherry picked from commit f1826fdbf136dc7c96756f0fb8a047c9d9dda82a)

src/osd/PeeringState.cc
src/osd/PeeringState.h

index 170d94703c964cec504b6fff3f3ba7551c2e5ed1..26b02f3cad3b613fcc8f5a174fcd6123ddaaaac4 100644 (file)
@@ -1605,43 +1605,66 @@ void PeeringState::reject_reservation()
 }
 
 /**
- * find_best_info
+ * calculate_maxlesf_and_minlua
  *
- * Returns an iterator to the best info in infos sorted by:
- *  1) Prefer newer last_update
- *  2) Prefer longer tail if it brings another info into contiguity
- *  3) Prefer current primary
+ * Calculate max_last_epoch_started and
+ * min_last_update_acceptable
  */
-map<pg_shard_t, pg_info_t>::const_iterator PeeringState::find_best_info(
-  const map<pg_shard_t, pg_info_t> &infos,
-  bool restrict_to_up_acting,
-  bool exclude_nonprimary_shards,
-  bool *history_les_bound) const
+void PeeringState::calculate_maxles_and_minlua( const map<pg_shard_t, pg_info_t> &infos,
+                                               epoch_t& max_last_epoch_started,
+                                               eversion_t& min_last_update_acceptable,
+                                               bool *history_les_bound) const
 {
-  ceph_assert(history_les_bound);
   /* See doc/dev/osd_internals/last_epoch_started.rst before attempting
    * to make changes to this process.  Also, make sure to update it
    * when you find bugs! */
-  epoch_t max_last_epoch_started_found = 0;
+  max_last_epoch_started = 0;
   for (auto i = infos.begin(); i != infos.end(); ++i) {
     if (!cct->_conf->osd_find_best_info_ignore_history_les &&
-       max_last_epoch_started_found < i->second.history.last_epoch_started) {
-      *history_les_bound = true;
-      max_last_epoch_started_found = i->second.history.last_epoch_started;
+       max_last_epoch_started < i->second.history.last_epoch_started) {
+      if (history_les_bound) {
+       *history_les_bound = true;
+      }
+      max_last_epoch_started = i->second.history.last_epoch_started;
     }
     if (!i->second.is_incomplete() &&
-       max_last_epoch_started_found < i->second.last_epoch_started) {
-      *history_les_bound = false;
-      max_last_epoch_started_found = i->second.last_epoch_started;
+       max_last_epoch_started < i->second.last_epoch_started) {
+      if (history_les_bound) {
+       *history_les_bound = false;
+      }
+      max_last_epoch_started = i->second.last_epoch_started;
     }
   }
-  eversion_t min_last_update_acceptable = eversion_t::max();
+  min_last_update_acceptable = eversion_t::max();
   for (auto i = infos.begin(); i != infos.end(); ++i) {
-    if (max_last_epoch_started_found <= i->second.last_epoch_started) {
+    if (max_last_epoch_started <= i->second.last_epoch_started) {
       if (min_last_update_acceptable > i->second.last_update)
        min_last_update_acceptable = i->second.last_update;
     }
   }
+}
+
+/**
+ * find_best_info
+ *
+ * Returns an iterator to the best info in infos sorted by:
+ *  1) Prefer newer last_update
+ *  2) Prefer longer tail if it brings another info into contiguity
+ *  3) Prefer current primary
+ */
+map<pg_shard_t, pg_info_t>::const_iterator PeeringState::find_best_info(
+  const map<pg_shard_t, pg_info_t> &infos,
+  bool restrict_to_up_acting,
+  bool exclude_nonprimary_shards,
+  bool *history_les_bound) const
+{
+  epoch_t max_last_epoch_started;
+  eversion_t min_last_update_acceptable;
+  calculate_maxles_and_minlua( infos,
+                              max_last_epoch_started,
+                              min_last_update_acceptable,
+                              history_les_bound);
+
   if (min_last_update_acceptable == eversion_t::max())
     return infos.end();
 
@@ -1658,7 +1681,7 @@ map<pg_shard_t, pg_info_t>::const_iterator PeeringState::find_best_info(
     if (p->second.last_update < min_last_update_acceptable)
       continue;
     // Disqualify anyone with a too old last_epoch_started
-    if (p->second.last_epoch_started < max_last_epoch_started_found)
+    if (p->second.last_epoch_started < max_last_epoch_started)
       continue;
     // Disqualify anyone who is incomplete (not fully backfilled)
     if (p->second.is_incomplete())
@@ -2462,9 +2485,9 @@ void PeeringState::choose_async_recovery_replicated(
  */
 bool PeeringState::choose_acting(pg_shard_t &auth_log_shard_id,
                                 bool restrict_to_up_acting,
+                                bool request_pg_temp_change_only,
                                 bool *history_les_bound,
-                                bool *repeat_getlog,
-                                bool request_pg_temp_change_only)
+                                bool *repeat_getlog)
 {
   map<pg_shard_t, pg_info_t> all_info(peer_info.begin(), peer_info.end());
   all_info[pg_whoami] = info;
@@ -6246,9 +6269,8 @@ PeeringState::Recovering::react(const RequestBackfill &evt)
   // as backfill might take a long time to complete..
   if (!ps->async_recovery_targets.empty()) {
     pg_shard_t auth_log_shard;
-    bool history_les_bound = false;
     // FIXME: Uh-oh we have to check this return value; choose_acting can fail!
-    ps->choose_acting(auth_log_shard, true, &history_les_bound, nullptr);
+    ps->choose_acting(auth_log_shard, true);
   }
   return transit<WaitLocalBackfillReserved>();
 }
@@ -6321,13 +6343,12 @@ PeeringState::Recovered::Recovered(my_context ctx)
   }
 
   // adjust acting set?  (e.g. because backfill completed...)
-  bool history_les_bound = false;
   if (ps->acting != ps->up &&
-      !ps->choose_acting(auth_log_shard, true, &history_les_bound, nullptr)) {
+      !ps->choose_acting(auth_log_shard, true)) {
     ceph_assert(ps->want_acting.size());
   } else if (!ps->async_recovery_targets.empty()) {
     // FIXME: Uh-oh we have to check this return value; choose_acting can fail!
-    ps->choose_acting(auth_log_shard, true, &history_les_bound, nullptr);
+    ps->choose_acting(auth_log_shard, true);
   }
 
   if (context< Active >().all_replicas_activated  &&
@@ -6473,9 +6494,8 @@ boost::statechart::result PeeringState::Active::react(const AdvMap& advmap)
     // note that we leave restrict_to_up_acting to false in order to
     // not overkill any chosen stray that is still alive.
     pg_shard_t auth_log_shard;
-    bool history_les_bound = false;
     ps->remove_down_peer_info(advmap.osdmap);
-    ps->choose_acting(auth_log_shard, false, &history_les_bound, nullptr, true);
+    ps->choose_acting(auth_log_shard, false, true);
   }
 
   /* Check for changes in pool size (if the acting set changed as a result,
@@ -6554,9 +6574,8 @@ boost::statechart::result PeeringState::Active::react(const MNotifyRec& notevt)
     // check if it is a previous down acting member that's coming back.
     // if so, request pg_temp change to trigger a new interval transition
     pg_shard_t auth_log_shard;
-    bool history_les_bound = false;
     // FIXME: Uh-oh we have to check this return value; choose_acting can fail!
-    ps->choose_acting(auth_log_shard, false, &history_les_bound, nullptr, true);
+    ps->choose_acting(auth_log_shard, false, true);
     if (!ps->want_acting.empty() && ps->want_acting != ps->acting) {
       psdout(10) << "Active: got notify from previous acting member "
                  << notevt.from << ", requesting pg_temp change"
@@ -7446,7 +7465,7 @@ PeeringState::GetLog::GetLog(my_context ctx)
   ps->log_weirdness();
 
   // adjust acting?
-  if (!ps->choose_acting(auth_log_shard, false,
+  if (!ps->choose_acting(auth_log_shard, false, false,
                         &context< Peering >().history_les_bound,
                          &repeat_getlog)) {
     if (!ps->want_acting.empty()) {
index e756879dd8d6e8e305a1d1af3028de26063f3032..29ae910ba372acf5aadb1090ccdeaf51c1022e5c 100644 (file)
@@ -1686,12 +1686,17 @@ private:
 
   void reject_reservation();
 
+  void calculate_maxles_and_minlua( const std::map<pg_shard_t, pg_info_t> &infos,
+                                   epoch_t& max_last_epoch_started,
+                                   eversion_t& min_last_update_acceptable,
+                                   bool *history_les_bound = nullptr) const;
+
   // acting std::set
   std::map<pg_shard_t, pg_info_t>::const_iterator find_best_info(
     const std::map<pg_shard_t, pg_info_t> &infos,
     bool restrict_to_up_acting,
     bool exclude_nonprimary_shards,
-    bool *history_les_bound) const;
+    bool *history_les_bound = nullptr) const;
 
   static void calc_ec_acting(
     std::map<pg_shard_t, pg_info_t>::const_iterator auth_log_shard,
@@ -1762,9 +1767,9 @@ private:
   bool recoverable(const std::vector<int> &want) const;
   bool choose_acting(pg_shard_t &auth_log_shard,
                     bool restrict_to_up_acting,
-                    bool *history_les_bound,
-                    bool *repeat_getlog,
-                    bool request_pg_temp_change_only = false);
+                    bool request_pg_temp_change_only = false,
+                    bool *history_les_bound = nullptr,
+                    bool *repeat_getlog = nullptr);
 
   bool search_for_missing(
     const pg_info_t &oinfo, const pg_missing_t &omissing,