]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Optimized EC calculate_maxles_and_minlua needs to use ...
authorBill Scales <bill_scales@uk.ibm.com>
Fri, 1 Aug 2025 10:48:18 +0000 (11:48 +0100)
committerAlex Ainscow <aainscow@uk.ibm.com>
Wed, 17 Sep 2025 08:43:26 +0000 (09:43 +0100)
exclude_nonprimary_shards

When an optimized EC pool is searching for the best shard that
isn't a non-primary shard then the calculation for maxles and
minlua needs to exclude nonprimary-shards

This bug was seen in a test run where activating a PG was
interrupted by a new epoch and only a couple of non-primary
shards became active and updated les. In the next epoch
a new primary (without log) failed to find a shard that
wasn't non-primary with the latest les. The les of
non-primary shards should be ignored when looking for
an appropriate shard to get the full log from.

This is safe because an epoch cannot start I/O without
at least K shards that have updated les, and there
are always K-1 non-primary shards. If I/O has started
then we will find the latest les even if we skip
non-primary shards. If I/O has not started then the
latest les ignoring non-primary shards is the
last epoch in which I/O was started and has a good
enough log+missing list.

Signed-off-by: Bill Scales <bill_scales@uk.ibm.com>
(cherry picked from commit 72d55eec85afa4c00fac8dc18a1fb49751e61985)

src/osd/PeeringState.cc
src/osd/PeeringState.h

index 33a7d33170bb946608a931fc0b198c553ed63059..b1389470a2bdeeef07f5240b26a32aed71bbc59c 100644 (file)
@@ -1615,6 +1615,7 @@ void PeeringState::reject_reservation()
 void PeeringState::calculate_maxles_and_minlua( const map<pg_shard_t, pg_info_t> &infos,
                                                epoch_t& max_last_epoch_started,
                                                eversion_t& min_last_update_acceptable,
+                                               bool exclude_nonprimary_shards,
                                                bool *history_les_bound) const
 {
   /* See doc/dev/osd_internals/last_epoch_started.rst before attempting
@@ -1622,6 +1623,9 @@ void PeeringState::calculate_maxles_and_minlua( const map<pg_shard_t, pg_info_t>
    * when you find bugs! */
   max_last_epoch_started = 0;
   for (auto i = infos.begin(); i != infos.end(); ++i) {
+    if (exclude_nonprimary_shards &&
+       pool.info.is_nonprimary_shard(shard_id_t(i->first.shard)))
+      continue;
     if (!cct->_conf->osd_find_best_info_ignore_history_les &&
        max_last_epoch_started < i->second.history.last_epoch_started) {
       if (history_les_bound) {
@@ -1665,6 +1669,7 @@ map<pg_shard_t, pg_info_t>::const_iterator PeeringState::find_best_info(
   calculate_maxles_and_minlua( infos,
                               max_last_epoch_started,
                               min_last_update_acceptable,
+                              exclude_nonprimary_shards,
                               history_les_bound);
 
   if (min_last_update_acceptable == eversion_t::max())
index 29ae910ba372acf5aadb1090ccdeaf51c1022e5c..145a031b480e22481a9afc3038237dac9aa5dbb8 100644 (file)
@@ -1689,6 +1689,7 @@ private:
   void calculate_maxles_and_minlua( const std::map<pg_shard_t, pg_info_t> &infos,
                                    epoch_t& max_last_epoch_started,
                                    eversion_t& min_last_update_acceptable,
+                                   bool exclude_nonprimary_shards = false,
                                    bool *history_les_bound = nullptr) const;
 
   // acting std::set