From 72d64260259f958587d15f717499735a0c4ea219 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 6 Aug 2019 14:30:53 -0500 Subject: [PATCH] osd: include PastInterals in pg_notify_t We use a pair everywhere a pg_notify_t is used. This is silly; just make it a member instead. Include some minor compat cruft so we can speak to pre-octopus OSDs. Signed-off-by: Sage Weil --- .../compound_peering_request.cc | 15 ++- src/messages/MOSDPGInfo.h | 28 +++++- src/messages/MOSDPGNotify.h | 30 +++++- src/osd/OSD.cc | 93 ++++++++----------- src/osd/OSD.h | 7 +- src/osd/PGPeeringEvent.h | 9 +- src/osd/PeeringState.cc | 36 ++++--- src/osd/PeeringState.h | 22 ++--- src/osd/osd_types.cc | 16 +++- src/osd/osd_types.h | 61 ++++++------ 10 files changed, 172 insertions(+), 145 deletions(-) diff --git a/src/crimson/osd/osd_operations/compound_peering_request.cc b/src/crimson/osd/osd_operations/compound_peering_request.cc index fee6a8794fa2..43d715e8c044 100644 --- a/src/crimson/osd/osd_operations/compound_peering_request.cc +++ b/src/crimson/osd/osd_operations/compound_peering_request.cc @@ -116,20 +116,18 @@ std::vector handle_pg_notify( std::vector ret; ret.reserve(m->get_pg_list().size()); const int from = m->get_source().num(); - for (auto &p : m->get_pg_list()) { - auto& [pg_notify, past_intervals] = p; + for (auto& pg_notify : m->get_pg_list()) { spg_t pgid{pg_notify.info.pgid.pgid, pg_notify.to}; MNotifyRec notify{pgid, pg_shard_t{from, pg_notify.from}, pg_notify, - 0, // the features is not used - past_intervals}; + 0}; // the features is not used logger().debug("handle_pg_notify on {} from {}", pgid.pgid, from); auto create_info = new PGCreateInfo{ pgid, pg_notify.query_epoch, pg_notify.info.history, - past_intervals, + pg_notify.past_intervals, false}; auto op = osd.get_shard_services().start_operation( state, @@ -157,8 +155,7 @@ std::vector handle_pg_info( std::vector ret; ret.reserve(m->pg_list.size()); const int from = m->get_source().num(); - for (auto &p : m->pg_list) { - auto& pg_notify = p.first; + for (auto& pg_notify : m->pg_list) { spg_t pgid{pg_notify.info.pgid.pgid, pg_notify.to}; logger().debug("handle_pg_info on {} from {}", pgid.pgid, from); MInfoRec info{pg_shard_t{from, pg_notify.from}, @@ -193,8 +190,8 @@ public: from.shard, pgid.shard, evt.get_epoch_sent(), osd.get_shard_services().get_osdmap()->get_epoch(), - empty), - PastIntervals()); + empty, + PastIntervals())); } }; diff --git a/src/messages/MOSDPGInfo.h b/src/messages/MOSDPGInfo.h index 0a4d92b5d63f..a439089bd3a7 100644 --- a/src/messages/MOSDPGInfo.h +++ b/src/messages/MOSDPGInfo.h @@ -21,13 +21,13 @@ class MOSDPGInfo : public Message { private: - static constexpr int HEAD_VERSION = 5; + static constexpr int HEAD_VERSION = 6; static constexpr int COMPAT_VERSION = 5; epoch_t epoch = 0; public: - using pg_list_t = std::vector>; + using pg_list_t = std::vector; pg_list_t pg_list; epoch_t get_epoch() const { return epoch; } @@ -57,7 +57,7 @@ public: ++i) { if (i != pg_list.begin()) out << " "; - out << i->first << "=" << i->second; + out << *i; } out << " epoch " << epoch << ")"; @@ -65,12 +65,34 @@ public: void encode_payload(uint64_t features) override { using ceph::encode; + header.version = HEAD_VERSION; encode(epoch, payload); + if (!HAVE_FEATURE(features, SERVER_OCTOPUS)) { + // pretend to be vector> + header.version = 5; + encode((uint32_t)pg_list.size(), payload); + for (auto& i : pg_list) { + encode(i, payload); // this embeds a dup (ignored) PastIntervals + encode(i.past_intervals, payload); + } + return; + } encode(pg_list, payload); } void decode_payload() override { auto p = payload.cbegin(); decode(epoch, p); + if (header.version == 5) { + // decode legacy vector> + uint32_t num; + decode(num, p); + pg_list.resize(num); + for (unsigned i = 0; i < num; ++i) { + decode(pg_list[i], p); + decode(pg_list[i].past_intervals, p); + } + return; + } decode(pg_list, p); } private: diff --git a/src/messages/MOSDPGNotify.h b/src/messages/MOSDPGNotify.h index dd1f49ce281d..9f25fa242410 100644 --- a/src/messages/MOSDPGNotify.h +++ b/src/messages/MOSDPGNotify.h @@ -25,7 +25,7 @@ class MOSDPGNotify : public Message { private: - static constexpr int HEAD_VERSION = 6; + static constexpr int HEAD_VERSION = 7; static constexpr int COMPAT_VERSION = 6; epoch_t epoch = 0; @@ -33,8 +33,8 @@ private: /// the current epoch if this is not being sent in response to a /// query. This allows the recipient to disregard responses to old /// queries. - using pg_list_t = std::vector>; - pg_list_t pg_list; // pgid -> version + using pg_list_t = std::vector; + pg_list_t pg_list; public: version_t get_epoch() const { return epoch; } @@ -59,13 +59,35 @@ public: void encode_payload(uint64_t features) override { using ceph::encode; + header.version = HEAD_VERSION; encode(epoch, payload); + if (!HAVE_FEATURE(features, SERVER_OCTOPUS)) { + // pretend to be vector> + header.version = 6; + encode((uint32_t)pg_list.size(), payload); + for (auto& i : pg_list) { + encode(i, payload); // this embeds a dup (ignored) PastIntervals + encode(i.past_intervals, payload); + } + return; + } encode(pg_list, payload); } void decode_payload() override { auto p = payload.cbegin(); decode(epoch, p); + if (header.version == 6) { + // decode legacy vector> + uint32_t num; + decode(num, p); + pg_list.resize(num); + for (unsigned i = 0; i < num; ++i) { + decode(pg_list[i], p); + decode(pg_list[i].past_intervals, p); + } + return; + } decode(pg_list, p); } void print(ostream& out) const override { @@ -75,7 +97,7 @@ public: ++i) { if (i != pg_list.begin()) out << " "; - out << i->first << "=" << i->second; + out << *i; } out << " epoch " << epoch << ")"; diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index d67002909e9e..1d03bc7a787a 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -8906,30 +8906,25 @@ void OSD::dispatch_context(PeeringCtx &ctx, PG *pg, OSDMapRef curmap, */ void OSD::do_notifies( - map > >& notify_list, + map>& notify_list, OSDMapRef curmap) { - for (map > >::iterator it = - notify_list.begin(); - it != notify_list.end(); - ++it) { - if (!curmap->is_up(it->first)) { - dout(20) << __func__ << " skipping down osd." << it->first << dendl; + for (auto& [osd, notifies] : notify_list) { + if (!curmap->is_up(osd)) { + dout(20) << __func__ << " skipping down osd." << osd << dendl; continue; } ConnectionRef con = service.get_con_osd_cluster( - it->first, curmap->get_epoch()); + osd, curmap->get_epoch()); if (!con) { - dout(20) << __func__ << " skipping osd." << it->first - << " (NULL con)" << dendl; + dout(20) << __func__ << " skipping osd." << osd << " (NULL con)" << dendl; continue; } service.maybe_share_map(con.get(), curmap); - dout(7) << __func__ << " osd." << it->first - << " on " << it->second.size() << " PGs" << dendl; + dout(7) << __func__ << " osd." << osd + << " on " << notifies.size() << " PGs" << dendl; MOSDPGNotify *m = new MOSDPGNotify(curmap->get_epoch(), - std::move(it->second)); + std::move(notifies)); con->send_message(m); } } @@ -8965,35 +8960,27 @@ void OSD::do_queries(map >& query_map, } -void OSD::do_infos(map > >& info_map, +void OSD::do_infos(map>& info_map, OSDMapRef curmap) { - for (map > >::iterator p = - info_map.begin(); - p != info_map.end(); - ++p) { - if (!curmap->is_up(p->first)) { - dout(20) << __func__ << " skipping down osd." << p->first << dendl; + for (auto& [osd, notifies] : info_map) { + if (!curmap->is_up(osd)) { + dout(20) << __func__ << " skipping down osd." << osd << dendl; continue; } - for (vector >::iterator i = p->second.begin(); - i != p->second.end(); - ++i) { - dout(20) << __func__ << " sending info " << i->first.info - << " to shard " << p->first << dendl; + for (auto& i : notifies) { + dout(20) << __func__ << " sending info " << i.info + << " to osd " << osd << dendl; } ConnectionRef con = service.get_con_osd_cluster( - p->first, curmap->get_epoch()); + osd, curmap->get_epoch()); if (!con) { - dout(20) << __func__ << " skipping osd." << p->first - << " (NULL con)" << dendl; + dout(20) << __func__ << " skipping osd." << osd << " (NULL con)" << dendl; continue; } service.maybe_share_map(con.get(), curmap); MOSDPGInfo *m = new MOSDPGInfo(curmap->get_epoch()); - m->pg_list = p->second; + m->pg_list = std::move(notifies); con->send_message(m); } info_map.clear(); @@ -9105,24 +9092,23 @@ void OSD::handle_fast_pg_notify(MOSDPGNotify* m) } int from = m->get_source().num(); for (auto& p : m->get_pg_list()) { - spg_t pgid(p.first.info.pgid.pgid, p.first.to); + spg_t pgid(p.info.pgid.pgid, p.to); enqueue_peering_evt( pgid, PGPeeringEventRef( std::make_shared( - p.first.epoch_sent, - p.first.query_epoch, + p.epoch_sent, + p.query_epoch, MNotifyRec( - pgid, pg_shard_t(from, p.first.from), - p.first, - m->get_connection()->get_features(), - p.second), + pgid, pg_shard_t(from, p.from), + p, + m->get_connection()->get_features()), true, new PGCreateInfo( pgid, - p.first.query_epoch, - p.first.info.history, - p.second, + p.query_epoch, + p.info.history, + p.past_intervals, false) ))); } @@ -9139,14 +9125,14 @@ void OSD::handle_fast_pg_info(MOSDPGInfo* m) int from = m->get_source().num(); for (auto& p : m->pg_list) { enqueue_peering_evt( - spg_t(p.first.info.pgid.pgid, p.first.to), + spg_t(p.info.pgid.pgid, p.to), PGPeeringEventRef( std::make_shared( - p.first.epoch_sent, p.first.query_epoch, + p.epoch_sent, p.query_epoch, MInfoRec( - pg_shard_t(from, p.first.from), - p.first.info, - p.first.epoch_sent))) + pg_shard_t(from, p.from), + p.info, + p.epoch_sent))) ); } m->put(); @@ -9237,14 +9223,13 @@ void OSD::handle_pg_query_nopg(const MQuery& q) osdmap->get_epoch(), empty, q.query.epoch_sent); } else { - vector> ls; + vector ls; ls.push_back( - make_pair( - pg_notify_t( - q.query.from, q.query.to, - q.query.epoch_sent, - osdmap->get_epoch(), - empty), + pg_notify_t( + q.query.from, q.query.to, + q.query.epoch_sent, + osdmap->get_epoch(), + empty, PastIntervals())); m = new MOSDPGNotify(osdmap->get_epoch(), std::move(ls)); } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 189c645d7da7..6e8ff26ca137 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1876,14 +1876,11 @@ protected: void dispatch_context_transaction(PeeringCtx &ctx, PG *pg, ThreadPool::TPHandle *handle = NULL); void discard_context(PeeringCtx &ctx); - void do_notifies(map > >& - notify_list, + void do_notifies(map>& notify_list, OSDMapRef map); void do_queries(map >& query_map, OSDMapRef map); - void do_infos(map > >& info_map, + void do_infos(map>& info_map, OSDMapRef map); bool require_mon_peer(const Message *m); diff --git a/src/osd/PGPeeringEvent.h b/src/osd/PGPeeringEvent.h index 79855f0c1374..762967e169da 100644 --- a/src/osd/PGPeeringEvent.h +++ b/src/osd/PGPeeringEvent.h @@ -94,14 +94,11 @@ struct MNotifyRec : boost::statechart::event< MNotifyRec > { pg_shard_t from; pg_notify_t notify; uint64_t features; - PastIntervals past_intervals; - MNotifyRec(spg_t p, pg_shard_t from, const pg_notify_t ¬ify, uint64_t f, - const PastIntervals& pi) - : pgid(p), from(from), notify(notify), features(f), past_intervals(pi) {} + MNotifyRec(spg_t p, pg_shard_t from, const pg_notify_t ¬ify, uint64_t f) + : pgid(p), from(from), notify(notify), features(f) {} void print(std::ostream *out) const { *out << "MNotifyRec " << pgid << " from " << from << " notify: " << notify - << " features: 0x" << std::hex << features << std::dec - << " " << past_intervals; + << " features: 0x" << std::hex << features << std::dec; } }; diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index 1ef59e153741..86e931835c83 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -1935,8 +1935,7 @@ bool PeeringState::search_for_missing( from.shard, pg_whoami.shard, get_osdmap_epoch(), get_osdmap_epoch(), - tinfo), - past_intervals); + tinfo, past_intervals)); } return found_missing; } @@ -2034,9 +2033,7 @@ void PeeringState::activate( ObjectStore::Transaction& t, epoch_t activation_epoch, map >& query_map, - map > > *activator_map, + map> *activator_map, PeeringCtxWrapper &ctx) { ceph_assert(!is_peered()); @@ -2161,8 +2158,8 @@ void PeeringState::activate( peer.shard, pg_whoami.shard, get_osdmap_epoch(), get_osdmap_epoch(), - info), - past_intervals); + info, + past_intervals)); } else { psdout(10) << "activate peer osd." << peer << " is up to date, but sending pg_log anyway" << dendl; @@ -2374,8 +2371,8 @@ void PeeringState::share_pg_info() pg_shard.shard, pg_whoami.shard, get_osdmap_epoch(), get_osdmap_epoch(), - info), - past_intervals); + info, + past_intervals)); pl->send_cluster_message(pg_shard.osd, m, get_osdmap_epoch()); } } @@ -2542,8 +2539,8 @@ void PeeringState::fulfill_query(const MQuery& query, PeeringCtxWrapper &rctx) notify_info.first.shard, pg_whoami.shard, query.query_epoch, get_osdmap_epoch(), - notify_info.second), - past_intervals); + notify_info.second, + past_intervals)); } else { update_history(query.query.history); fulfill_log(query.from, query.query, query.query_epoch); @@ -4080,8 +4077,8 @@ boost::statechart::result PeeringState::Reset::react(const ActMap&) ps->get_primary().shard, ps->pg_whoami.shard, ps->get_osdmap_epoch(), ps->get_osdmap_epoch(), - ps->info), - ps->past_intervals); + ps->info, + ps->past_intervals)); } ps->update_heartbeat_peers(); @@ -5606,7 +5603,8 @@ boost::statechart::result PeeringState::ReplicaActive::react( ps->get_primary().shard, ps->pg_whoami.shard, ps->get_osdmap_epoch(), ps->get_osdmap_epoch(), - ps->info); + ps->info, + PastIntervals()); i.info.history.last_epoch_started = evt.activation_epoch; i.info.history.last_interval_started = i.info.history.same_interval_since; @@ -5616,7 +5614,7 @@ boost::statechart::result PeeringState::ReplicaActive::react( ps->state_set(PG_STATE_PEERED); } - m->pg_list.emplace_back(i, PastIntervals()); + m->pg_list.emplace_back(i); pl->send_cluster_message( ps->get_primary().osd, m, @@ -5664,8 +5662,8 @@ boost::statechart::result PeeringState::ReplicaActive::react(const ActMap&) ps->get_primary().shard, ps->pg_whoami.shard, ps->get_osdmap_epoch(), ps->get_osdmap_epoch(), - ps->info), - ps->past_intervals); + ps->info, + ps->past_intervals)); } return discard_event(); } @@ -5784,8 +5782,8 @@ boost::statechart::result PeeringState::Stray::react(const ActMap&) ps->get_primary().shard, ps->pg_whoami.shard, ps->get_osdmap_epoch(), ps->get_osdmap_epoch(), - ps->info), - ps->past_intervals); + ps->info, + ps->past_intervals)); } return discard_event(); } diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index 46abe1af0354..627c7f0a6e84 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -52,8 +52,8 @@ class PeeringCtx; // [primary only] content recovery state struct BufferedRecoveryMessages { map > query_map; - map > > info_map; - map > > notify_list; + map> info_map; + map> notify_list; BufferedRecoveryMessages() = default; BufferedRecoveryMessages(PeeringCtx &); @@ -275,8 +275,8 @@ private: struct PeeringCtxWrapper { utime_t start_time; map > &query_map; - map > > &info_map; - map > > ¬ify_list; + map> &info_map; + map> ¬ify_list; ObjectStore::Transaction &transaction; HBHandle * const handle = nullptr; @@ -296,9 +296,8 @@ private: PeeringCtxWrapper(PeeringCtxWrapper &&ctx) = default; - void send_notify(pg_shard_t to, - const pg_notify_t &info, const PastIntervals &pi) { - notify_list[to.osd].emplace_back(info, pi); + void send_notify(pg_shard_t to, const pg_notify_t &n) { + notify_list[to.osd].emplace_back(n); } }; public: @@ -474,7 +473,7 @@ public: return state->rctx->query_map; } - map > > &get_info_map() { + map> &get_info_map() { ceph_assert(state->rctx); return state->rctx->info_map; } @@ -484,10 +483,9 @@ public: return *(state->rctx); } - void send_notify(pg_shard_t to, - const pg_notify_t &info, const PastIntervals &pi) { + void send_notify(pg_shard_t to, const pg_notify_t &n) { ceph_assert(state->rctx); - state->rctx->send_notify(to, info, pi); + state->rctx->send_notify(to, n); } }; friend class PeeringMachine; @@ -1414,7 +1412,7 @@ public: ObjectStore::Transaction& t, epoch_t activation_epoch, map >& query_map, - map > > *activator_map, + map> *activator_map, PeeringCtxWrapper &ctx); void rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead); diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 12f95a0b6283..98b264687c6a 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -3344,23 +3344,27 @@ void pg_info_t::generate_test_instances(list& o) // -- pg_notify_t -- void pg_notify_t::encode(ceph::buffer::list &bl) const { - ENCODE_START(2, 2, bl); + ENCODE_START(3, 2, bl); encode(query_epoch, bl); encode(epoch_sent, bl); encode(info, bl); encode(to, bl); encode(from, bl); + encode(past_intervals, bl); ENCODE_FINISH(bl); } void pg_notify_t::decode(ceph::buffer::list::const_iterator &bl) { - DECODE_START(2, bl); + DECODE_START(3, bl); decode(query_epoch, bl); decode(epoch_sent, bl); decode(info, bl); decode(to, bl); decode(from, bl); + if (struct_v >= 3) { + decode(past_intervals, bl); + } DECODE_FINISH(bl); } @@ -3375,12 +3379,15 @@ void pg_notify_t::dump(Formatter *f) const info.dump(f); f->close_section(); } + f->dump_object("past_intervals", past_intervals); } void pg_notify_t::generate_test_instances(list& o) { - o.push_back(new pg_notify_t(shard_id_t(3), shard_id_t::NO_SHARD, 1, 1, pg_info_t())); - o.push_back(new pg_notify_t(shard_id_t(0), shard_id_t(0), 3, 10, pg_info_t())); + o.push_back(new pg_notify_t(shard_id_t(3), shard_id_t::NO_SHARD, 1, 1, + pg_info_t(), PastIntervals())); + o.push_back(new pg_notify_t(shard_id_t(0), shard_id_t(0), 3, 10, + pg_info_t(), PastIntervals())); } ostream &operator<<(ostream &lhs, const pg_notify_t ¬ify) @@ -3392,6 +3399,7 @@ ostream &operator<<(ostream &lhs, const pg_notify_t ¬ify) notify.to != shard_id_t::NO_SHARD) lhs << " " << (unsigned)notify.from << "->" << (unsigned)notify.to; + lhs << " " << notify.past_intervals; return lhs << ")"; } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index a8662ac5040d..cbde9c437166 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -2993,35 +2993,6 @@ struct pg_fast_info_t { WRITE_CLASS_ENCODER(pg_fast_info_t) -struct pg_notify_t { - epoch_t query_epoch; - epoch_t epoch_sent; - pg_info_t info; - shard_id_t to; - shard_id_t from; - pg_notify_t() : - query_epoch(0), epoch_sent(0), to(shard_id_t::NO_SHARD), - from(shard_id_t::NO_SHARD) {} - pg_notify_t( - shard_id_t to, - shard_id_t from, - epoch_t query_epoch, - epoch_t epoch_sent, - const pg_info_t &info) - : query_epoch(query_epoch), - epoch_sent(epoch_sent), - info(info), to(to), from(from) { - ceph_assert(from == info.pgid.shard); - } - void encode(ceph::buffer::list &bl) const; - void decode(ceph::buffer::list::const_iterator &p); - void dump(ceph::Formatter *f) const; - static void generate_test_instances(std::list &o); -}; -WRITE_CLASS_ENCODER(pg_notify_t) -std::ostream &operator<<(std::ostream &lhs, const pg_notify_t ¬ify); - - class OSDMap; /** * PastIntervals -- information needed to determine the PriorSet and @@ -3523,6 +3494,38 @@ PastIntervals::PriorSet::PriorSet( << dendl; } +struct pg_notify_t { + epoch_t query_epoch; + epoch_t epoch_sent; + pg_info_t info; + shard_id_t to; + shard_id_t from; + PastIntervals past_intervals; + pg_notify_t() : + query_epoch(0), epoch_sent(0), to(shard_id_t::NO_SHARD), + from(shard_id_t::NO_SHARD) {} + pg_notify_t( + shard_id_t to, + shard_id_t from, + epoch_t query_epoch, + epoch_t epoch_sent, + const pg_info_t &info, + const PastIntervals& pi) + : query_epoch(query_epoch), + epoch_sent(epoch_sent), + info(info), to(to), from(from), + past_intervals(pi) { + ceph_assert(from == info.pgid.shard); + } + void encode(ceph::buffer::list &bl) const; + void decode(ceph::buffer::list::const_iterator &p); + void dump(ceph::Formatter *f) const; + static void generate_test_instances(std::list &o); +}; +WRITE_CLASS_ENCODER(pg_notify_t) +std::ostream &operator<<(std::ostream &lhs, const pg_notify_t ¬ify); + + /** * pg_query_t - used to ask a peer for information about a pg. * -- 2.47.3