From: Sage Weil Date: Thu, 2 May 2019 19:34:53 +0000 (-0500) Subject: osd: clean up osdmap sharing X-Git-Tag: v15.1.0~2750^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=573913c2e83801e305467cdb58f37d0d8a586f97;p=ceph-ci.git osd: clean up osdmap sharing - always use the Session::last_sent_epoch value, both for clients and osds - get rid of the stl map<> of peer epochs - consolidate all map sharing into a single maybe_share_map() - optionally take a lower bound on the peer's epoch, for use when it is available (e.g., when we are handling a message that specifies what epoch the peer had when it sent the message) - use const OSDMapRef& where possible - drop osd->is_active() check, since we no longer have any dependency on OSD[Service] state beyond our osdmap The old callchain was convoluted, partly because it was needlessly separated into several layers of helpers, and partly because the tracking for clients and peer OSDs was totally different. Signed-off-by: Sage Weil --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 4ca945c50a3..174044db0c5 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -226,7 +226,6 @@ OSDService::OSDService(OSD *osd) : publish_lock{ceph::make_mutex("OSDService::publish_lock")}, pre_publish_lock{ceph::make_mutex("OSDService::pre_publish_lock")}, max_oldest_map(0), - peer_map_epoch_lock("OSDService::peer_map_epoch_lock"), sched_scrub_lock("OSDService::sched_scrub_lock"), scrubs_pending(0), scrubs_active(0), agent_lock("OSDService::agent_lock"), @@ -994,7 +993,7 @@ void OSDService::send_message_osd_cluster(int peer, Message *m, epoch_t from_epo } ConnectionRef peer_con = osd->cluster_messenger->connect_to_osd( next_map->get_cluster_addrs(peer)); - share_map_peer(peer, peer_con.get(), next_map); + maybe_share_map(peer_con.get(), next_map); peer_con->send_message(m); release_map(next_map); } @@ -1163,153 +1162,6 @@ void OSDService::prune_pg_created() // -------------------------------------- // dispatch -epoch_t OSDService::get_peer_epoch(int peer) -{ - std::lock_guard l(peer_map_epoch_lock); - map::iterator p = peer_map_epoch.find(peer); - if (p == peer_map_epoch.end()) - return 0; - return p->second; -} - -epoch_t OSDService::note_peer_epoch(int peer, epoch_t e) -{ - std::lock_guard l(peer_map_epoch_lock); - map::iterator p = peer_map_epoch.find(peer); - if (p != peer_map_epoch.end()) { - if (p->second < e) { - dout(10) << "note_peer_epoch osd." << peer << " has " << e << dendl; - p->second = e; - } else { - dout(30) << "note_peer_epoch osd." << peer << " has " << p->second << " >= " << e << dendl; - } - return p->second; - } else { - dout(10) << "note_peer_epoch osd." << peer << " now has " << e << dendl; - peer_map_epoch[peer] = e; - return e; - } -} - -void OSDService::forget_peer_epoch(int peer, epoch_t as_of) -{ - std::lock_guard l(peer_map_epoch_lock); - map::iterator p = peer_map_epoch.find(peer); - if (p != peer_map_epoch.end()) { - if (p->second <= as_of) { - dout(10) << "forget_peer_epoch osd." << peer << " as_of " << as_of - << " had " << p->second << dendl; - peer_map_epoch.erase(p); - } else { - dout(10) << "forget_peer_epoch osd." << peer << " as_of " << as_of - << " has " << p->second << " - not forgetting" << dendl; - } - } -} - -bool OSDService::should_share_map(entity_name_t name, Connection *con, - epoch_t epoch, const OSDMapRef& osdmap, - const epoch_t *sent_epoch_p) -{ - dout(20) << "should_share_map " - << name << " " << con->get_peer_addr() - << " " << epoch << dendl; - - // does client have old map? - if (name.is_client()) { - bool message_sendmap = epoch < osdmap->get_epoch(); - if (message_sendmap && sent_epoch_p) { - dout(20) << "client session last_sent_epoch: " - << *sent_epoch_p - << " versus osdmap epoch " << osdmap->get_epoch() << dendl; - if (*sent_epoch_p < osdmap->get_epoch()) { - return true; - } // else we don't need to send it out again - } - } - - if (con->get_messenger() == osd->cluster_messenger && - con != osd->cluster_messenger->get_loopback_connection() && - osdmap->is_up(name.num()) && - (osdmap->get_cluster_addrs(name.num()) == con->get_peer_addrs() || - osdmap->get_hb_back_addrs(name.num()) == con->get_peer_addrs())) { - // remember - epoch_t has = std::max(get_peer_epoch(name.num()), epoch); - - // share? - if (has < osdmap->get_epoch()) { - dout(10) << name << " " << con->get_peer_addr() - << " has old map " << epoch << " < " - << osdmap->get_epoch() << dendl; - return true; - } - } - - return false; -} - -void OSDService::share_map( - entity_name_t name, - Connection *con, - epoch_t epoch, - OSDMapRef& osdmap, - epoch_t *sent_epoch_p) -{ - dout(20) << "share_map " - << name << " " << con->get_peer_addr() - << " " << epoch << dendl; - - if (!osd->is_active()) { - /*It is safe not to proceed as OSD is not in healthy state*/ - return; - } - - bool want_shared = should_share_map(name, con, epoch, - osdmap, sent_epoch_p); - - if (want_shared){ - if (name.is_client()) { - dout(10) << name << " has old map " << epoch - << " < " << osdmap->get_epoch() << dendl; - // we know the Session is valid or we wouldn't be sending - if (sent_epoch_p) { - *sent_epoch_p = osdmap->get_epoch(); - } - send_incremental_map(epoch, con, osdmap); - } else if (con->get_messenger() == osd->cluster_messenger && - osdmap->is_up(name.num()) && - (osdmap->get_cluster_addrs(name.num()) == con->get_peer_addrs() || - osdmap->get_hb_back_addrs(name.num()) == con->get_peer_addrs())) { - dout(10) << name << " " << con->get_peer_addrs() - << " has old map " << epoch << " < " - << osdmap->get_epoch() << dendl; - note_peer_epoch(name.num(), osdmap->get_epoch()); - send_incremental_map(epoch, con, osdmap); - } - } -} - -void OSDService::share_map_peer(int peer, Connection *con, OSDMapRef map) -{ - if (!map) - map = get_osdmap(); - - // send map? - epoch_t pe = get_peer_epoch(peer); - if (pe) { - if (pe < map->get_epoch()) { - send_incremental_map(pe, con, map); - note_peer_epoch(peer, map->get_epoch()); - } else - dout(20) << "share_map_peer " << con << " already has epoch " << pe << dendl; - } else { - dout(20) << "share_map_peer " << con << " don't know epoch, doing nothing" << dendl; - // no idea about peer's epoch. - // ??? send recent ??? - // do nothing. - } -} - bool OSDService::can_inc_scrubs_pending() { bool can_inc = false; @@ -1527,7 +1379,7 @@ void OSDService::send_map(MOSDMap *m, Connection *con) } void OSDService::send_incremental_map(epoch_t since, Connection *con, - OSDMapRef& osdmap) + const OSDMapRef& osdmap) { epoch_t to = osdmap->get_epoch(); dout(10) << "send_incremental_map " << since << " -> " << to @@ -5008,11 +4860,10 @@ void OSD::handle_osd_ping(MOSDPing *m) m->get_connection()->send_message(r); if (curmap->is_up(from)) { - service.note_peer_epoch(from, m->map_epoch); if (is_active()) { ConnectionRef con = service.get_con_osd_cluster(from, curmap->get_epoch()); if (con) { - service.share_map_peer(from, con.get()); + service.maybe_share_map(con.get(), get_osdmap(), m->map_epoch); } } } else if (!curmap->exists(from) || @@ -5103,11 +4954,10 @@ void OSD::handle_osd_ping(MOSDPing *m) if (m->map_epoch && curmap->is_up(from)) { - service.note_peer_epoch(from, m->map_epoch); if (is_active()) { ConnectionRef con = service.get_con_osd_cluster(from, curmap->get_epoch()); if (con) { - service.share_map_peer(from, con.get()); + service.maybe_share_map(con.get(), get_osdmap(), m->map_epoch); } } } @@ -7062,39 +6912,48 @@ bool OSD::ms_dispatch(Message *m) return true; } -void OSD::maybe_share_map( - Session *session, - OpRequestRef op, - OSDMapRef osdmap) +void OSDService::maybe_share_map( + Connection *con, + const OSDMapRef& osdmap, + epoch_t peer_epoch_lb) { - if (!op->check_send_map) { + // NOTE: we assume caller hold something that keeps the Connection itself + // pinned (e.g., an OpRequest's MessageRef). + auto priv = con->get_priv(); + auto session = static_cast(priv.get()); + if (!session) { return; } - epoch_t last_sent_epoch = 0; + // assume the peer has the newer of the op's sent_epoch and what + // we think we sent them. session->sent_epoch_lock.lock(); - last_sent_epoch = session->last_sent_epoch; + if (peer_epoch_lb > session->last_sent_epoch) { + dout(10) << __func__ << " con " << con + << " " << con->get_peer_addr() + << " map epoch " << session->last_sent_epoch + << " -> " << peer_epoch_lb << " (as per caller)" << dendl; + session->last_sent_epoch = peer_epoch_lb; + } + epoch_t last_sent_epoch = session->last_sent_epoch; session->sent_epoch_lock.unlock(); - // assume the peer has the newer of the op's sent_epoch and what - // we think we sent them. - epoch_t from = std::max(last_sent_epoch, op->sent_epoch); + if (osdmap->get_epoch() <= last_sent_epoch) { + return; + } - const Message *m = op->get_req(); - service.share_map( - m->get_source(), - m->get_connection().get(), - from, - osdmap, - session ? &last_sent_epoch : NULL); + send_incremental_map(last_sent_epoch, con, osdmap); + last_sent_epoch = osdmap->get_epoch(); session->sent_epoch_lock.lock(); if (session->last_sent_epoch < last_sent_epoch) { + dout(10) << __func__ << " con " << con + << " " << con->get_peer_addr() + << " map epoch " << session->last_sent_epoch + << " -> " << last_sent_epoch << " (shared)" << dendl; session->last_sent_epoch = last_sent_epoch; } session->sent_epoch_lock.unlock(); - - op->check_send_map = false; } void OSD::dispatch_session_waiting(SessionRef session, OSDMapRef osdmap) @@ -7729,7 +7588,6 @@ void OSD::note_down_osd(int peer) void OSD::note_up_osd(int peer) { - service.forget_peer_epoch(peer, osdmap->get_epoch() - 1); heartbeat_set_peers_need_update(); } @@ -9155,7 +9013,7 @@ void OSD::do_notifies( << " (NULL con)" << dendl; continue; } - service.share_map_peer(it->first, con.get(), curmap); + service.maybe_share_map(con.get(), curmap); dout(7) << __func__ << " osd." << it->first << " on " << it->second.size() << " PGs" << dendl; MOSDPGNotify *m = new MOSDPGNotify(curmap->get_epoch(), @@ -9185,7 +9043,7 @@ void OSD::do_queries(map >& query_map, << " (NULL con)" << dendl; continue; } - service.share_map_peer(who, con.get(), curmap); + service.maybe_share_map(con.get(), curmap); dout(7) << __func__ << " querying osd." << who << " on " << pit->second.size() << " PGs" << dendl; MOSDPGQuery *m = new MOSDPGQuery(curmap->get_epoch(), @@ -9221,7 +9079,7 @@ void OSD::do_infos(mapfirst, con.get(), curmap); + service.maybe_share_map(con.get(), curmap); MOSDPGInfo *m = new MOSDPGInfo(curmap->get_epoch()); m->pg_list = p->second; con->send_message(m); @@ -9478,7 +9336,7 @@ void OSD::handle_pg_query_nopg(const MQuery& q) PastIntervals())); m = new MOSDPGNotify(osdmap->get_epoch(), std::move(ls)); } - service.share_map_peer(q.from.osd, con.get(), osdmap); + service.maybe_share_map(con.get(), osdmap); con->send_message(m); } } @@ -9747,10 +9605,9 @@ void OSD::dequeue_op( logger->tinc(l_osd_op_before_dequeue_op_lat, latency); - auto priv = op->get_req()->get_connection()->get_priv(); - if (auto session = static_cast(priv.get()); session) { - maybe_share_map(session, op, pg->get_osdmap()); - } + service.maybe_share_map(m->get_connection().get(), + pg->get_osdmap(), + op->sent_epoch); if (pg->is_deleting()) return; @@ -10884,10 +10741,9 @@ void OSD::ShardedOpWQ::_process(uint32_t thread_index, heartbeat_handle_d *hb) << ", dropping " << qi << dendl; // share map with client? if (boost::optional _op = qi.maybe_get_op()) { - auto priv = (*_op)->get_req()->get_connection()->get_priv(); - if (auto session = static_cast(priv.get()); session) { - osd->maybe_share_map(session, *_op, sdata->shard_osdmap); - } + osd->service.maybe_share_map((*_op)->get_req()->get_connection().get(), + sdata->shard_osdmap, + (*_op)->sent_epoch); } unsigned pushes_to_free = qi.get_reserved_pushes(); if (pushes_to_free > 0) { diff --git a/src/osd/OSD.h b/src/osd/OSD.h index d7f334ebdd6..83965cc01a7 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -394,24 +394,15 @@ public: return next_osdmap; } -private: - Mutex peer_map_epoch_lock; - map peer_map_epoch; -public: - epoch_t get_peer_epoch(int p); - epoch_t note_peer_epoch(int p, epoch_t e); - void forget_peer_epoch(int p, epoch_t e); + void maybe_share_map(Connection *con, + const OSDMapRef& osdmap, + epoch_t peer_epoch_lb=0); void send_map(class MOSDMap *m, Connection *con); - void send_incremental_map(epoch_t since, Connection *con, OSDMapRef& osdmap); + void send_incremental_map(epoch_t since, Connection *con, + const OSDMapRef& osdmap); MOSDMap *build_incremental_map_msg(epoch_t from, epoch_t to, OSDSuperblock& superblock); - bool should_share_map(entity_name_t name, Connection *con, epoch_t epoch, - const OSDMapRef& osdmap, const epoch_t *sent_epoch_p); - void share_map(entity_name_t name, Connection *con, epoch_t epoch, - OSDMapRef& osdmap, epoch_t *sent_epoch_p); - void share_map_peer(int peer, Connection *con, - OSDMapRef map = OSDMapRef()); ConnectionRef get_con_osd_cluster(int peer, epoch_t from_epoch); pair get_con_osd_hb(int peer, epoch_t from_epoch); // (back, front) @@ -1481,7 +1472,6 @@ private: // -- sessions -- private: void dispatch_session_waiting(SessionRef session, OSDMapRef osdmap); - void maybe_share_map(Session *session, OpRequestRef op, OSDMapRef osdmap); Mutex session_waiting_lock; set session_waiting_for_map; diff --git a/src/osd/PG.cc b/src/osd/PG.cc index ba6d8730c11..a27b3bf08fe 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -767,7 +767,7 @@ void PG::send_cluster_message( return; if (share_map_update) { - osd->share_map_peer(target, con.get(), get_osdmap()); + osd->maybe_share_map(con.get(), get_osdmap()); } osd->send_message_osd_cluster(m, con.get()); }