From: Bill Scales Date: Fri, 1 Aug 2025 10:48:18 +0000 (+0100) Subject: osd: Optimized EC calculate_maxles_and_minlua needs to use ... X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=43fdccd8911758df6833c1806e4c797b1bced9a8;p=ceph.git osd: Optimized EC calculate_maxles_and_minlua needs to use ... exclude_nonprimary_shards When an optimized EC pool is searching for the best shard that isn't a non-primary shard then the calculation for maxles and minlua needs to exclude nonprimary-shards This bug was seen in a test run where activating a PG was interrupted by a new epoch and only a couple of non-primary shards became active and updated les. In the next epoch a new primary (without log) failed to find a shard that wasn't non-primary with the latest les. The les of non-primary shards should be ignored when looking for an appropriate shard to get the full log from. This is safe because an epoch cannot start I/O without at least K shards that have updated les, and there are always K-1 non-primary shards. If I/O has started then we will find the latest les even if we skip non-primary shards. If I/O has not started then the latest les ignoring non-primary shards is the last epoch in which I/O was started and has a good enough log+missing list. Signed-off-by: Bill Scales (cherry picked from commit 72d55eec85afa4c00fac8dc18a1fb49751e61985) --- diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index 33a7d33170bb9..b1389470a2bde 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -1615,6 +1615,7 @@ void PeeringState::reject_reservation() void PeeringState::calculate_maxles_and_minlua( const map &infos, epoch_t& max_last_epoch_started, eversion_t& min_last_update_acceptable, + bool exclude_nonprimary_shards, bool *history_les_bound) const { /* See doc/dev/osd_internals/last_epoch_started.rst before attempting @@ -1622,6 +1623,9 @@ void PeeringState::calculate_maxles_and_minlua( const map * when you find bugs! */ max_last_epoch_started = 0; for (auto i = infos.begin(); i != infos.end(); ++i) { + if (exclude_nonprimary_shards && + pool.info.is_nonprimary_shard(shard_id_t(i->first.shard))) + continue; if (!cct->_conf->osd_find_best_info_ignore_history_les && max_last_epoch_started < i->second.history.last_epoch_started) { if (history_les_bound) { @@ -1665,6 +1669,7 @@ map::const_iterator PeeringState::find_best_info( calculate_maxles_and_minlua( infos, max_last_epoch_started, min_last_update_acceptable, + exclude_nonprimary_shards, history_les_bound); if (min_last_update_acceptable == eversion_t::max()) diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index 29ae910ba372a..145a031b480e2 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -1689,6 +1689,7 @@ private: void calculate_maxles_and_minlua( const std::map &infos, epoch_t& max_last_epoch_started, eversion_t& min_last_update_acceptable, + bool exclude_nonprimary_shards = false, bool *history_les_bound = nullptr) const; // acting std::set