*remapped*
the PG is temporarily mapped to a different set of OSDs from what
CRUSH specified
+
+*premerge*
+ the PG is in a quiesced-IO state due to an impending PG merge. That
+ happens when pg_num_pending < pg_num, and applies to the PGs with
+ pg_num_pending <= ps < pg_num as well as the corresponding peer PG
+ that it is merging with.
#include "messages/MOSDRepOpReply.h"
#include "messages/MOSDBoot.h"
#include "messages/MOSDPGTemp.h"
+#include "messages/MOSDPGReadyToMerge.h"
#include "messages/MOSDMap.h"
#include "messages/MMonGetOSDMap.h"
}
}
+// ---
+
+void OSDService::set_ready_to_merge_source(PG *pg)
+{
+ Mutex::Locker l(merge_lock);
+ dout(10) << __func__ << " " << pg->pg_id << dendl;
+ ready_to_merge_source.insert(pg->pg_id.pgid);
+ _send_ready_to_merge();
+}
+
+void OSDService::set_ready_to_merge_target(PG *pg)
+{
+ Mutex::Locker l(merge_lock);
+ dout(10) << __func__ << " " << pg->pg_id << dendl;
+ ready_to_merge_target.insert(pg->pg_id.pgid);
+ _send_ready_to_merge();
+}
+
+void OSDService::send_ready_to_merge()
+{
+ Mutex::Locker l(merge_lock);
+ _send_ready_to_merge();
+}
+
+void OSDService::_send_ready_to_merge()
+{
+ for (auto src : ready_to_merge_source) {
+ if (ready_to_merge_target.count(src.get_parent()) &&
+ sent_ready_to_merge_source.count(src) == 0) {
+ monc->send_mon_message(new MOSDPGReadyToMerge(src, osdmap->get_epoch()));
+ sent_ready_to_merge_source.insert(src);
+ }
+ }
+}
+
+void OSDService::clear_ready_to_merge(PG *pg)
+{
+ Mutex::Locker l(merge_lock);
+ ready_to_merge_source.erase(pg->pg_id.pgid);
+ ready_to_merge_target.erase(pg->pg_id.pgid);
+}
+
+void OSDService::clear_sent_ready_to_merge()
+{
+ Mutex::Locker l(merge_lock);
+ sent_ready_to_merge_source.clear();
+}
+
+void OSDService::prune_sent_ready_to_merge(OSDMapRef& osdmap)
+{
+ Mutex::Locker l(merge_lock);
+ auto i = sent_ready_to_merge_source.begin();
+ while (i != sent_ready_to_merge_source.end()) {
+ if (!osdmap->pg_exists(*i)) {
+ dout(10) << __func__ << " " << *i << dendl;
+ i = sent_ready_to_merge_source.erase(i);
+ } else {
+ ++i;
+ }
+ }
+}
+
+// ---
+
void OSDService::_queue_for_recovery(
std::pair<epoch_t, PGRef> p,
uint64_t reserved_pushes)
send_full_update();
send_alive();
service.requeue_pg_temp();
+ service.clear_sent_ready_to_merge();
service.send_pg_temp();
+ service.send_ready_to_merge();
requeue_failures();
send_failures();
AsyncReserver<spg_t> local_reserver;
AsyncReserver<spg_t> remote_reserver;
+ // -- pg merge --
+ Mutex merge_lock = {"OSD::merge_lock"};
+ set<pg_t> ready_to_merge_source;
+ set<pg_t> ready_to_merge_target;
+ set<pg_t> sent_ready_to_merge_source;
+
+ void set_ready_to_merge_source(PG *pg);
+ void set_ready_to_merge_target(PG *pg);
+ void clear_ready_to_merge(PG *pg);
+ void send_ready_to_merge();
+ void _send_ready_to_merge();
+ void clear_sent_ready_to_merge();
+ void prune_sent_ready_to_merge(OSDMapRef& osdmap);
+
// -- pg_temp --
private:
Mutex pg_temp_lock;
unreg_next_scrub();
+ if (is_primary()) {
+ osd->clear_ready_to_merge(this);
+ }
+
pg_shard_t old_acting_primary = get_primary();
pg_shard_t old_up_primary = up_primary;
bool was_old_primary = is_primary();
// deactivate.
state_clear(PG_STATE_ACTIVE);
state_clear(PG_STATE_PEERED);
+ state_clear(PG_STATE_PREMERGE);
state_clear(PG_STATE_DOWN);
state_clear(PG_STATE_RECOVERY_WAIT);
state_clear(PG_STATE_RECOVERY_TOOFULL);
if (pg->is_active()) {
pg->mark_clean();
+ } else if (pg->is_peered()) {
+ bool target;
+ if (pg->pool.info.is_pending_merge(pg->info.pgid.pgid, &target)) {
+ if (target) {
+ ldout(pg->cct, 10) << "ready to merge (target)" << dendl;
+ pg->osd->set_ready_to_merge_target(pg);
+ } else {
+ ldout(pg->cct, 10) << "ready to merge (source)" << dendl;
+ pg->osd->set_ready_to_merge_source(pg);
+ }
+ }
}
pg->state_clear(PG_STATE_FORCED_RECOVERY | PG_STATE_FORCED_BACKFILL);
+
pg->share_pg_info();
pg->publish_stats_to_osd();
pg->requeue_ops(pg->waiting_for_clean_to_primary_repair);
pg->state_clear(PG_STATE_ACTIVATING);
pg->state_clear(PG_STATE_CREATING);
- if (pg->acting.size() >= pg->pool.info.min_size) {
- pg->state_set(PG_STATE_ACTIVE);
- } else {
+ pg->state_clear(PG_STATE_PREMERGE);
+
+ if (pg->acting.size() < pg->pool.info.min_size) {
pg->state_set(PG_STATE_PEERED);
+ } else if (pg->pool.info.is_pending_merge(pg->info.pgid.pgid, nullptr)) {
+ pg->state_set(PG_STATE_PEERED);
+ pg->state_set(PG_STATE_PREMERGE);
+ } else {
+ pg->state_set(PG_STATE_ACTIVE);
}
// info.last_epoch_started is set during activate()
oss << "degraded+";
if (state & PG_STATE_REMAPPED)
oss << "remapped+";
+ if (state & PG_STATE_PREMERGE)
+ oss << "premerge+";
if (state & PG_STATE_SCRUBBING)
oss << "scrubbing+";
if (state & PG_STATE_DEEP_SCRUB)
type = PG_STATE_RECOVERY_UNFOUND;
else if (state == "backfill_unfound")
type = PG_STATE_BACKFILL_UNFOUND;
+ else if (state == "premerge")
+ type = PG_STATE_PREMERGE;
else if (state == "scrubbing")
type = PG_STATE_SCRUBBING;
else if (state == "degraded")
return (pg_num_mask + 1) >> 1; // bigger bin (not yet split)
}
+bool pg_pool_t::is_pending_merge(pg_t pgid, bool *target) const
+{
+ if (pg_num_pending >= pg_num) {
+ return false;
+ }
+ if (pgid.ps() >= pg_num_pending && pgid.ps() < pg_num) {
+ if (target) {
+ *target = false;
+ }
+ return true;
+ }
+ for (unsigned ps = pg_num_pending; ps < pg_num; ++ps) {
+ if (pg_t(ps, pgid.pool()).get_parent() == pgid) {
+ if (target) {
+ *target = true;
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
/*
* we have two snap modes:
* - pool snaps
#define PG_STATE_DOWN (1ULL << 4) // a needed replica is down, PG offline
#define PG_STATE_RECOVERY_UNFOUND (1ULL << 5) // recovery stopped due to unfound
#define PG_STATE_BACKFILL_UNFOUND (1ULL << 6) // backfill stopped due to unfound
-//#define PG_STATE_SPLITTING (1ULL << 7) // i am splitting
+#define PG_STATE_PREMERGE (1ULL << 7) // i am prepare to merging
#define PG_STATE_SCRUBBING (1ULL << 8) // scrubbing
//#define PG_STATE_SCRUBQ (1ULL << 9) // queued for scrub
#define PG_STATE_DEGRADED (1ULL << 10) // pg contains objects with reduced redundancy
// pool size that it represents.
unsigned get_pg_num_divisor(pg_t pgid) const;
+ bool is_pending_merge(pg_t pgid, bool *target) const;
+
void set_pg_num(int p) {
pg_num = p;
pg_num_pending = p;