]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: PeeringState: fix stretch peering so PGs can go peered but not active
authorGreg Farnum <gfarnum@redhat.com>
Fri, 12 Mar 2021 20:13:38 +0000 (20:13 +0000)
committerGreg Farnum <gfarnum@redhat.com>
Fri, 12 Mar 2021 21:51:10 +0000 (21:51 +0000)
I misunderstood and there was a pretty serious error here: to prevent
accidents, choose_acting() was preventing PGs from *finishing* peering
if they didn't satisfy the stretch cluster rules. What we actually want
to do is to finish peering, but not go active.

Happily, this is easy to fix -- we just add a call to stretch_set_can_peer()
alongside existing min_size checks when we choose whether to go PG_STATE_ACTIVE
or PG_STATE_PEERED!

Signed-off-by: Greg Farnum <gfarnum@redhat.com>
src/osd/PeeringState.cc

index 804779c1ba9d3c75ed5d3ded5545be7a8f294135..7296e0647cf105d751bd8f9fd379fa6d46d13d72 100644 (file)
@@ -2444,16 +2444,6 @@ bool PeeringState::choose_acting(pg_shard_t &auth_log_shard_id,
     }
     return false;
   }
-  // make sure we respect the stretch cluster rules -- and
-  // didn't break them with earlier choices!
-  const pg_pool_t& pg_pool = pool.info;
-  if (pg_pool.is_stretch_pool()) {
-    stringstream ss;
-    if (!pg_pool.stretch_set_can_peer(want, *get_osdmap(), &ss)) {
-      psdout(5) << "peering blocked by stretch_can_peer: " << ss.str() << dendl;
-      return false;
-    }
-  }
 
   if (request_pg_temp_change_only)
     return true;
@@ -2664,7 +2654,8 @@ void PeeringState::activate(
 
   if (is_primary()) {
     // only update primary last_epoch_started if we will go active
-    if (acting.size() >= pool.info.min_size) {
+    if ((acting.size() >= pool.info.min_size) &&
+        pool.info.stretch_set_can_peer(acting, *get_osdmap(), NULL)) {
       ceph_assert(cct->_conf->osd_find_best_info_ignore_history_les ||
             info.last_epoch_started <= activation_epoch);
       info.last_epoch_started = activation_epoch;
@@ -2965,7 +2956,8 @@ void PeeringState::activate(
     state_set(PG_STATE_ACTIVATING);
     pl->on_activate(std::move(to_trim));
   }
-  if (acting.size() >= pool.info.min_size) {
+  if ((acting.size() >= pool.info.min_size) &&
+      pool.info.stretch_set_can_peer(acting, *get_osdmap(), NULL)) {
     PGLog::LogEntryHandlerRef rollbacker{pl->get_log_handler(t)};
     pg_log.roll_forward(rollbacker.get());
   }
@@ -6230,7 +6222,9 @@ boost::statechart::result PeeringState::Active::react(const AllReplicasActivated
        pl->set_not_ready_to_merge_source(pgid);
       }
     }
-  } else if (ps->acting.size() < ps->pool.info.min_size) {
+  } else if ((ps->acting.size() < ps->pool.info.min_size) ||
+            !ps->pool.info.stretch_set_can_peer(ps->acting,
+                                                *ps->get_osdmap(), NULL)) {
     ps->state_set(PG_STATE_PEERED);
   } else {
     ps->state_set(PG_STATE_ACTIVE);
@@ -6388,7 +6382,9 @@ boost::statechart::result PeeringState::ReplicaActive::react(
     {}, /* lease */
     ps->get_lease_ack());
 
-  if (ps->acting.size() >= ps->pool.info.min_size) {
+  if ((ps->acting.size() >= ps->pool.info.min_size) &&
+      ps->pool.info.stretch_set_can_peer(ps->acting,
+                                        *ps->get_osdmap(), NULL)) {
     ps->state_set(PG_STATE_ACTIVE);
   } else {
     ps->state_set(PG_STATE_PEERED);