From d48d7c9ce56cd201ffbee183296fc899827d622a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 15 Feb 2018 21:26:48 -0600 Subject: [PATCH] mon/OSDMonitor: MOSDPGReadyToMerge to complete a pg_num change This message allows pg_num to be decremented (once the final PGs are ready). Signed-off-by: Sage Weil --- src/messages/MOSDPGReadyToMerge.h | 34 ++++++++++++++ src/mon/Monitor.cc | 1 + src/mon/OSDMonitor.cc | 74 +++++++++++++++++++++++++++++++ src/mon/OSDMonitor.h | 3 ++ src/msg/Message.cc | 4 ++ src/msg/Message.h | 1 + 6 files changed, 117 insertions(+) create mode 100644 src/messages/MOSDPGReadyToMerge.h diff --git a/src/messages/MOSDPGReadyToMerge.h b/src/messages/MOSDPGReadyToMerge.h new file mode 100644 index 0000000000000..f0cb8f1daa764 --- /dev/null +++ b/src/messages/MOSDPGReadyToMerge.h @@ -0,0 +1,34 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +class MOSDPGReadyToMerge + : public MessageInstance { +public: + pg_t pgid; + + MOSDPGReadyToMerge() + : MessageInstance(MSG_OSD_PG_READY_TO_MERGE, 0) + {} + MOSDPGReadyToMerge(pg_t p, epoch_t e) + : MessageInstance(MSG_OSD_PG_READY_TO_MERGE, e), + pgid(p) + {} + void encode_payload(uint64_t features) override { + using ceph::encode; + paxos_encode(); + encode(pgid, payload); + } + void decode_payload() override { + bufferlist::const_iterator p = payload.begin(); + paxos_decode(p); + decode(pgid, p); + } + const char *get_type_name() const override { return "osd_pg_ready_to_merge"; } + void print(ostream &out) const { + out << get_type_name() + << "(" << pgid + << " v" << version << ")"; + } +}; diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 9b09d8101125e..b575b1b0b1ce8 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -4168,6 +4168,7 @@ void Monitor::dispatch_op(MonOpRequestRef op) case MSG_OSD_PGTEMP: case MSG_OSD_PG_CREATED: case MSG_REMOVE_SNAPS: + case MSG_OSD_PG_READY_TO_MERGE: paxos_service[PAXOS_OSDMAP]->dispatch(op); return; diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index ca43c1eebb9fa..3a816024fa32e 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -49,6 +49,7 @@ #include "messages/MOSDPGCreate2.h" #include "messages/MOSDPGCreated.h" #include "messages/MOSDPGTemp.h" +#include "messages/MOSDPGReadyToMerge.h" #include "messages/MMonCommand.h" #include "messages/MRemoveSnaps.h" #include "messages/MOSDScrub.h" @@ -2068,6 +2069,8 @@ bool OSDMonitor::preprocess_query(MonOpRequestRef op) return preprocess_alive(op); case MSG_OSD_PG_CREATED: return preprocess_pg_created(op); + case MSG_OSD_PG_READY_TO_MERGE: + return preprocess_pg_ready_to_merge(op); case MSG_OSD_PGTEMP: return preprocess_pgtemp(op); case MSG_OSD_BEACON: @@ -2107,6 +2110,8 @@ bool OSDMonitor::prepare_update(MonOpRequestRef op) return prepare_pg_created(op); case MSG_OSD_PGTEMP: return prepare_pgtemp(op); + case MSG_OSD_PG_READY_TO_MERGE: + return prepare_pg_ready_to_merge(op); case MSG_OSD_BEACON: return prepare_beacon(op); @@ -3184,6 +3189,75 @@ bool OSDMonitor::prepare_pg_created(MonOpRequestRef op) return true; } +bool OSDMonitor::preprocess_pg_ready_to_merge(MonOpRequestRef op) +{ + op->mark_osdmon_event(__func__); + auto m = static_cast(op->get_req()); + dout(10) << __func__ << " " << *m << dendl; + const pg_pool_t *pi; + auto session = m->get_session(); + if (!session) { + dout(10) << __func__ << ": no monitor session!" << dendl; + goto ignore; + } + if (!session->is_capable("osd", MON_CAP_X)) { + derr << __func__ << " received from entity " + << "with insufficient privileges " << session->caps << dendl; + goto ignore; + } + pi = osdmap.get_pg_pool(m->pgid.pool()); + if (!pi) { + derr << __func__ << " pool for " << m->pgid << " dne" << dendl; + goto ignore; + } + if (pi->get_pg_num() <= m->pgid.ps()) { + dout(20) << " pg_num " << pi->get_pg_num() << " already < " << m->pgid << dendl; + goto ignore; + } + if (pi->get_pg_num() != m->pgid.ps() + 1) { + derr << " OSD trying to merge wrong pgid " << m->pgid << dendl; + goto ignore; + } + if (pi->get_pg_num_pending() > m->pgid.ps()) { + dout(20) << " pg_num_pending " << pi->get_pg_num_pending() << " > " << m->pgid << dendl; + goto ignore; + } + return false; + + ignore: + mon->no_reply(op); + return true; +} + +bool OSDMonitor::prepare_pg_ready_to_merge(MonOpRequestRef op) +{ + op->mark_osdmon_event(__func__); + auto m = static_cast(op->get_req()); + dout(10) << __func__ << " " << *m << dendl; + pg_pool_t p; + if (pending_inc.new_pools.count(m->pgid.pool())) + p = pending_inc.new_pools[m->pgid.pool()]; + else + p = *osdmap.get_pg_pool(m->pgid.pool()); + if (p.get_pg_num() != m->pgid.ps() + 1 || + p.get_pg_num_pending() > m->pgid.ps()) { + dout(10) << __func__ + << " race with concurrent pg_num[_pending] update, will retry" + << dendl; + wait_for_finished_proposal(op, new C_RetryMessage(this, op)); + return true; + } + + p.dec_pg_num(); + p.last_change = pending_inc.epoch; + pending_inc.new_pools[m->pgid.pool()] = p; + + wait_for_finished_proposal(op, new C_ReplyMap(this, op, m->version)); + mon->no_reply(op); + return true; +} + + // ------------- // pg_temp changes diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 4d560811a857b..249e866ccceb2 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -388,6 +388,9 @@ private: bool preprocess_pg_created(MonOpRequestRef op); bool prepare_pg_created(MonOpRequestRef op); + bool preprocess_pg_ready_to_merge(MonOpRequestRef op); + bool prepare_pg_ready_to_merge(MonOpRequestRef op); + int _check_remove_pool(int64_t pool_id, const pg_pool_t &pool, ostream *ss); bool _check_become_tier( int64_t tier_pool_id, const pg_pool_t *tier_pool, diff --git a/src/msg/Message.cc b/src/msg/Message.cc index ac9a175b483ec..3785a01dc452d 100644 --- a/src/msg/Message.cc +++ b/src/msg/Message.cc @@ -92,6 +92,7 @@ #include "messages/MOSDPGBackfillRemove.h" #include "messages/MOSDPGRecoveryDelete.h" #include "messages/MOSDPGRecoveryDeleteReply.h" +#include "messages/MOSDPGReadyToMerge.h" #include "messages/MRemoveSnaps.h" @@ -567,6 +568,9 @@ Message *decode_message(CephContext *cct, int crcflags, case MSG_OSD_PG_RECOVERY_DELETE_REPLY: m = MOSDPGRecoveryDeleteReply::create(); break; + case MSG_OSD_PG_READY_TO_MERGE: + m = MOSDPGReadyToMerge::create(); + break; case MSG_OSD_EC_WRITE: m = MOSDECSubOpWrite::create(); break; diff --git a/src/msg/Message.h b/src/msg/Message.h index d298738ca18cb..29c04645a7c7e 100644 --- a/src/msg/Message.h +++ b/src/msg/Message.h @@ -125,6 +125,7 @@ #define MSG_OSD_PG_CREATE2 120 #define MSG_OSD_SCRUB2 121 +#define MSG_OSD_PG_READY_TO_MERGE 122 // *** MDS *** -- 2.39.5