From b8ac4361be2b05e6fd60cf318c1ea2e930504cbb Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 15 Sep 2018 16:36:55 -0500 Subject: [PATCH] osd: cancel pg merge if PGs are undersized If the PG is undersized, cancel the PG merge attempt early. Undersized is a bad thing because it makes merge more dangerous. It's also bad because the PG won't be fully clean when it finishes peering, which means last_epoch_clean can be something far in the past, and past_intervals won't be empty. Since we also take the past_intervals from the source PG, we want to be confident that it is valid. It *should* match up with the target PG since they should have mapped to the same OSDs since they were both clean at the ReadyToMerge point--in fact, they should both be empty. If a PG mapping change snuck in such that they did map somewhere else, though, the same set of mapping changes will have applied to both the source and target, so it should be safe. (It would be better of the mon rejected the ReadyToMerge if the mapping with the latest OSDMap has changed since the message was sent. If we do that the situation is even better, but this change is still appropriate.) Signed-off-by: Sage Weil --- src/osd/OSD.cc | 22 ++++++++++++++++++++++ src/osd/OSD.h | 2 ++ src/osd/PG.cc | 29 ++++++++++++++++++++++------- 3 files changed, 46 insertions(+), 7 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 671034e4be8b2..596c23c0ee29e 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1710,6 +1710,14 @@ void OSDService::set_ready_to_merge_target(PG *pg, epoch_t last_epoch_clean) _send_ready_to_merge(); } +void OSDService::set_not_ready_to_merge_source(pg_t pgid) +{ + Mutex::Locker l(merge_lock); + dout(10) << __func__ << " " << pgid << dendl; + not_ready_to_merge_source.insert(pgid); + _send_ready_to_merge(); +} + void OSDService::send_ready_to_merge() { Mutex::Locker l(merge_lock); @@ -1718,7 +1726,20 @@ void OSDService::send_ready_to_merge() void OSDService::_send_ready_to_merge() { + for (auto src : not_ready_to_merge_source) { + if (sent_ready_to_merge_source.count(src) == 0) { + monc->send_mon_message(new MOSDPGReadyToMerge( + src, + 0, + false, + osdmap->get_epoch())); + sent_ready_to_merge_source.insert(src); + } + } for (auto src : ready_to_merge_source) { + if (not_ready_to_merge_source.count(src)) { + continue; + } auto p = ready_to_merge_target.find(src.get_parent()); if (p != ready_to_merge_target.end() && sent_ready_to_merge_source.count(src) == 0) { @@ -1738,6 +1759,7 @@ void OSDService::clear_ready_to_merge(PG *pg) dout(10) << __func__ << " " << pg->pg_id << dendl; ready_to_merge_source.erase(pg->pg_id.pgid); ready_to_merge_target.erase(pg->pg_id.pgid); + not_ready_to_merge_source.erase(pg->pg_id.pgid); } void OSDService::clear_sent_ready_to_merge() diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 96b060d3f6e56..880ef52d1dc5f 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -718,10 +718,12 @@ public: Mutex merge_lock = {"OSD::merge_lock"}; set ready_to_merge_source; map ready_to_merge_target; // pg -> last_epoch_clean + set not_ready_to_merge_source; set sent_ready_to_merge_source; void set_ready_to_merge_source(PG *pg); void set_ready_to_merge_target(PG *pg, epoch_t last_epoch_clean); + void set_not_ready_to_merge_source(pg_t pgid); void clear_ready_to_merge(PG *pg); void send_ready_to_merge(); void _send_ready_to_merge(); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index b04b0cfa5a96d..046ba1e9366ec 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2350,15 +2350,15 @@ void PG::try_mark_clean() if (pool.info.is_pending_merge(info.pgid.pgid, &target)) { if (target) { ldout(cct, 10) << "ready to merge (target)" << dendl; - osd->set_ready_to_merge_target(this, - info.history.last_epoch_clean); + osd->set_ready_to_merge_target(this, info.history.last_epoch_clean); } else { ldout(cct, 10) << "ready to merge (source)" << dendl; osd->set_ready_to_merge_source(this); } } } else { -#warning we should back off the merge! + ldout(cct, 10) << "not clean, not ready to merge" << dendl; + // we should have notified OSD in Active state entry point } } @@ -8405,25 +8405,40 @@ boost::statechart::result PG::RecoveryState::Active::react(const QueryState& q) boost::statechart::result PG::RecoveryState::Active::react(const AllReplicasActivated &evt) { PG *pg = context< RecoveryMachine >().pg; + pg_t pgid = pg->info.pgid.pgid; + all_replicas_activated = true; pg->state_clear(PG_STATE_ACTIVATING); pg->state_clear(PG_STATE_CREATING); pg->state_clear(PG_STATE_PREMERGE); - if (pg->acting.size() < pg->pool.info.min_size) { - pg->state_set(PG_STATE_PEERED); - } else if (pg->pool.info.is_pending_merge(pg->info.pgid.pgid, nullptr)) { + bool merge_target; + if (pg->pool.info.is_pending_merge(pgid, &merge_target)) { pg->state_set(PG_STATE_PEERED); pg->state_set(PG_STATE_PREMERGE); + + if (pg->actingset.size() != pg->get_osdmap()->get_pg_size(pgid)) { + if (merge_target) { + pg_t src = pgid; + src.set_ps(pg->pool.info.get_pg_num_pending()); + assert(src.get_parent() == pgid); + pg->osd->set_not_ready_to_merge_source(src); + } else { + pg->osd->set_not_ready_to_merge_source(pgid); + } + } + } else if (pg->acting.size() < pg->pool.info.min_size) { + pg->state_set(PG_STATE_PEERED); } else { pg->state_set(PG_STATE_ACTIVE); } // info.last_epoch_started is set during activate() if (pg->info.history.last_epoch_started == 0) { - pg->osd->send_pg_created(pg->info.pgid.pgid); + pg->osd->send_pg_created(pgid); } + pg->info.history.last_epoch_started = pg->info.last_epoch_started; pg->info.history.last_interval_started = pg->info.last_interval_started; pg->dirty_info = true; -- 2.39.5