From e534ca5a0576241c4dade6e566acdd5729945f0a Mon Sep 17 00:00:00 2001 From: Greg Farnum Date: Fri, 12 Mar 2021 20:13:38 +0000 Subject: [PATCH] osd: PeeringState: fix stretch peering so PGs can go peered but not active I misunderstood and there was a pretty serious error here: to prevent accidents, choose_acting() was preventing PGs from *finishing* peering if they didn't satisfy the stretch cluster rules. What we actually want to do is to finish peering, but not go active. Happily, this is easy to fix -- we just add a call to stretch_set_can_peer() alongside existing min_size checks when we choose whether to go PG_STATE_ACTIVE or PG_STATE_PEERED! Signed-off-by: Greg Farnum --- src/osd/PeeringState.cc | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index 804779c1ba9..7296e0647cf 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -2444,16 +2444,6 @@ bool PeeringState::choose_acting(pg_shard_t &auth_log_shard_id, } return false; } - // make sure we respect the stretch cluster rules -- and - // didn't break them with earlier choices! - const pg_pool_t& pg_pool = pool.info; - if (pg_pool.is_stretch_pool()) { - stringstream ss; - if (!pg_pool.stretch_set_can_peer(want, *get_osdmap(), &ss)) { - psdout(5) << "peering blocked by stretch_can_peer: " << ss.str() << dendl; - return false; - } - } if (request_pg_temp_change_only) return true; @@ -2664,7 +2654,8 @@ void PeeringState::activate( if (is_primary()) { // only update primary last_epoch_started if we will go active - if (acting.size() >= pool.info.min_size) { + if ((acting.size() >= pool.info.min_size) && + pool.info.stretch_set_can_peer(acting, *get_osdmap(), NULL)) { ceph_assert(cct->_conf->osd_find_best_info_ignore_history_les || info.last_epoch_started <= activation_epoch); info.last_epoch_started = activation_epoch; @@ -2965,7 +2956,8 @@ void PeeringState::activate( state_set(PG_STATE_ACTIVATING); pl->on_activate(std::move(to_trim)); } - if (acting.size() >= pool.info.min_size) { + if ((acting.size() >= pool.info.min_size) && + pool.info.stretch_set_can_peer(acting, *get_osdmap(), NULL)) { PGLog::LogEntryHandlerRef rollbacker{pl->get_log_handler(t)}; pg_log.roll_forward(rollbacker.get()); } @@ -6230,7 +6222,9 @@ boost::statechart::result PeeringState::Active::react(const AllReplicasActivated pl->set_not_ready_to_merge_source(pgid); } } - } else if (ps->acting.size() < ps->pool.info.min_size) { + } else if ((ps->acting.size() < ps->pool.info.min_size) || + !ps->pool.info.stretch_set_can_peer(ps->acting, + *ps->get_osdmap(), NULL)) { ps->state_set(PG_STATE_PEERED); } else { ps->state_set(PG_STATE_ACTIVE); @@ -6388,7 +6382,9 @@ boost::statechart::result PeeringState::ReplicaActive::react( {}, /* lease */ ps->get_lease_ack()); - if (ps->acting.size() >= ps->pool.info.min_size) { + if ((ps->acting.size() >= ps->pool.info.min_size) && + ps->pool.info.stretch_set_can_peer(ps->acting, + *ps->get_osdmap(), NULL)) { ps->state_set(PG_STATE_ACTIVE); } else { ps->state_set(PG_STATE_PEERED); -- 2.39.5