]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: cancel pg merge if PGs are undersized
authorSage Weil <sage@redhat.com>
Sat, 15 Sep 2018 21:36:55 +0000 (16:36 -0500)
committerSage Weil <sage@redhat.com>
Thu, 20 Sep 2018 13:35:15 +0000 (08:35 -0500)
If the PG is undersized, cancel the PG merge attempt early.  Undersized is
a bad thing because it makes merge more dangerous.

It's also bad because the PG won't be fully clean when it finishes
peering, which means last_epoch_clean can be something far in the past,
and past_intervals won't be empty.  Since we also take the past_intervals
from the source PG, we want to be confident that it is valid.  It *should*
match up with the target PG since they should have mapped to the same
OSDs since they were both clean at the ReadyToMerge point--in fact, they
should both be empty.  If a PG mapping change snuck in such that they did
map somewhere else, though, the same set of mapping changes will have
applied to both the source and target, so it should be safe.

(It would be better of the mon rejected the ReadyToMerge if the
mapping with the latest OSDMap has changed since the message was sent.
If we do that the situation is even better, but this change is still
appropriate.)

Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/OSD.cc
src/osd/OSD.h
src/osd/PG.cc

index 671034e4be8b2f8202936dff0e3dae45b5fa784c..596c23c0ee29e8908ed18fb5beeb142b94b233f2 100644 (file)
@@ -1710,6 +1710,14 @@ void OSDService::set_ready_to_merge_target(PG *pg, epoch_t last_epoch_clean)
   _send_ready_to_merge();
 }
 
+void OSDService::set_not_ready_to_merge_source(pg_t pgid)
+{
+  Mutex::Locker l(merge_lock);
+  dout(10) << __func__ << " " << pgid << dendl;
+  not_ready_to_merge_source.insert(pgid);
+  _send_ready_to_merge();
+}
+
 void OSDService::send_ready_to_merge()
 {
   Mutex::Locker l(merge_lock);
@@ -1718,7 +1726,20 @@ void OSDService::send_ready_to_merge()
 
 void OSDService::_send_ready_to_merge()
 {
+  for (auto src : not_ready_to_merge_source) {
+    if (sent_ready_to_merge_source.count(src) == 0) {
+      monc->send_mon_message(new MOSDPGReadyToMerge(
+                              src,
+                              0,
+                              false,
+                              osdmap->get_epoch()));
+      sent_ready_to_merge_source.insert(src);
+    }
+  }
   for (auto src : ready_to_merge_source) {
+    if (not_ready_to_merge_source.count(src)) {
+      continue;
+    }
     auto p = ready_to_merge_target.find(src.get_parent());
     if (p != ready_to_merge_target.end() &&
        sent_ready_to_merge_source.count(src) == 0) {
@@ -1738,6 +1759,7 @@ void OSDService::clear_ready_to_merge(PG *pg)
   dout(10) << __func__ << " " << pg->pg_id << dendl;
   ready_to_merge_source.erase(pg->pg_id.pgid);
   ready_to_merge_target.erase(pg->pg_id.pgid);
+  not_ready_to_merge_source.erase(pg->pg_id.pgid);
 }
 
 void OSDService::clear_sent_ready_to_merge()
index 96b060d3f6e568e15c18ded50279dd1da30efd70..880ef52d1dc5f06bad0d31d118b4b4b48c36beaf 100644 (file)
@@ -718,10 +718,12 @@ public:
   Mutex merge_lock = {"OSD::merge_lock"};
   set<pg_t> ready_to_merge_source;
   map<pg_t,epoch_t> ready_to_merge_target;  // pg -> last_epoch_clean
+  set<pg_t> not_ready_to_merge_source;
   set<pg_t> sent_ready_to_merge_source;
 
   void set_ready_to_merge_source(PG *pg);
   void set_ready_to_merge_target(PG *pg, epoch_t last_epoch_clean);
+  void set_not_ready_to_merge_source(pg_t pgid);
   void clear_ready_to_merge(PG *pg);
   void send_ready_to_merge();
   void _send_ready_to_merge();
index b04b0cfa5a96d40f1b188ab865fe071cbeaab461..046ba1e9366ec1905c5bbcdb14279486bf81c48c 100644 (file)
@@ -2350,15 +2350,15 @@ void PG::try_mark_clean()
       if (pool.info.is_pending_merge(info.pgid.pgid, &target)) {
        if (target) {
          ldout(cct, 10) << "ready to merge (target)" << dendl;
-         osd->set_ready_to_merge_target(this,
-                                        info.history.last_epoch_clean);
+         osd->set_ready_to_merge_target(this, info.history.last_epoch_clean);
        } else {
          ldout(cct, 10) << "ready to merge (source)" << dendl;
          osd->set_ready_to_merge_source(this);
        }
       }
     } else {
-#warning we should back off the merge!
+      ldout(cct, 10) << "not clean, not ready to merge" << dendl;
+      // we should have notified OSD in Active state entry point
     }
   }
 
@@ -8405,25 +8405,40 @@ boost::statechart::result PG::RecoveryState::Active::react(const QueryState& q)
 boost::statechart::result PG::RecoveryState::Active::react(const AllReplicasActivated &evt)
 {
   PG *pg = context< RecoveryMachine >().pg;
+  pg_t pgid = pg->info.pgid.pgid;
+
   all_replicas_activated = true;
 
   pg->state_clear(PG_STATE_ACTIVATING);
   pg->state_clear(PG_STATE_CREATING);
   pg->state_clear(PG_STATE_PREMERGE);
 
-  if (pg->acting.size() < pg->pool.info.min_size) {
-    pg->state_set(PG_STATE_PEERED);
-  } else if (pg->pool.info.is_pending_merge(pg->info.pgid.pgid, nullptr)) {
+  bool merge_target;
+  if (pg->pool.info.is_pending_merge(pgid, &merge_target)) {
     pg->state_set(PG_STATE_PEERED);
     pg->state_set(PG_STATE_PREMERGE);
+
+    if (pg->actingset.size() != pg->get_osdmap()->get_pg_size(pgid)) {
+      if (merge_target) {
+       pg_t src = pgid;
+       src.set_ps(pg->pool.info.get_pg_num_pending());
+       assert(src.get_parent() == pgid);
+       pg->osd->set_not_ready_to_merge_source(src);
+      } else {
+       pg->osd->set_not_ready_to_merge_source(pgid);
+      }
+    }
+  } else if (pg->acting.size() < pg->pool.info.min_size) {
+    pg->state_set(PG_STATE_PEERED);
   } else {
     pg->state_set(PG_STATE_ACTIVE);
   }
 
   // info.last_epoch_started is set during activate()
   if (pg->info.history.last_epoch_started == 0) {
-    pg->osd->send_pg_created(pg->info.pgid.pgid);
+    pg->osd->send_pg_created(pgid);
   }
+
   pg->info.history.last_epoch_started = pg->info.last_epoch_started;
   pg->info.history.last_interval_started = pg->info.last_interval_started;
   pg->dirty_info = true;