From: Matan Breizman Date: Wed, 1 Feb 2023 08:49:19 +0000 (+0000) Subject: messages/MOSDMap: Rename oldest_map to cluster_osdmap_trim_lower_bound X-Git-Tag: v16.2.14~69^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6eaa9bdced002cbbe34a016f061e3a3d22ef7984;p=ceph.git messages/MOSDMap: Rename oldest_map to cluster_osdmap_trim_lower_bound Previously, MOSDMap messages sent to other OSDs were populated with the superblocks's oldest_map. We should, instead, use the superblock's cluster_osdmap_trim_lower_bound because oldest map is merely a marker for each osd's trimming progress. As specified in the docs: *** We use cluster_osdmap_trim_lower_bound rather than a specific osd's oldest_map because we don't necessarily trim all MOSDMap::oldest_map. In order to avoid doing too much work at once we limit the amount of osdmaps trimmed using ``osd_target_transaction_size`` in OSD::trim_maps(). For this reason, a specific OSD's oldest_map can lag behind OSDSuperblock::cluster_osdmap_trim_lower_bound for a while. *** Signed-off-by: Matan Breizman (cherry picked from commit 721c9aea1869bf95a167b8879756b4f71fbf41bf) --- diff --git a/doc/dev/osd_internals/past_intervals.rst b/doc/dev/osd_internals/past_intervals.rst index 9d5342de0f96..5b594df1ae0c 100644 --- a/doc/dev/osd_internals/past_intervals.rst +++ b/doc/dev/osd_internals/past_intervals.rst @@ -83,9 +83,9 @@ This dependency also pops up in PeeringState::check_past_interval_bounds(). PeeringState::get_required_past_interval_bounds takes as a parameter oldest_epoch, which comes from OSDSuperblock::cluster_osdmap_trim_lower_bound. We use cluster_osdmap_trim_lower_bound rather than a specific osd's oldest_map -because we don't necessarily trim all MOSDMap::oldest_map. In order to avoid -doing too much work at once we limit the amount of osdmaps trimmed using -``osd_target_transaction_size`` in OSD::trim_maps(). +because we don't necessarily trim all MOSDMap::cluster_osdmap_trim_lower_bound. +In order to avoid doing too much work at once we limit the amount of osdmaps +trimmed using ``osd_target_transaction_size`` in OSD::trim_maps(). For this reason, a specific OSD's oldest_map can lag behind OSDSuperblock::cluster_osdmap_trim_lower_bound for a while. diff --git a/src/messages/MOSDMap.h b/src/messages/MOSDMap.h index 4a57011c44ff..a74873b04014 100644 --- a/src/messages/MOSDMap.h +++ b/src/messages/MOSDMap.h @@ -30,7 +30,25 @@ public: uint64_t encode_features = 0; std::map maps; std::map incremental_maps; - epoch_t oldest_map =0, newest_map = 0; + /** + * cluster_osdmap_trim_lower_bound + * + * Encodes a lower bound on the monitor's osdmap trim bound. Recipients + * can safely trim up to this bound. The sender stores maps back to + * cluster_osdmap_trim_lower_bound. + * + * This field was formerly named oldest_map and encoded the oldest map + * stored by the sender. The primary usage of this field, however, was to + * allow the recipient to trim. The secondary usage was to inform the + * recipient of how many maps the sender stored in case it needed to request + * more. For both purposes, it should be safe for an older OSD to interpret + * this field as oldest_map, and it should be safe for a new osd to interpret + * the oldest_map field sent by an older osd as + * cluster_osdmap_trim_lower_bound. + * See bug https://tracker.ceph.com/issues/49689 + */ + epoch_t cluster_osdmap_trim_lower_bound = 0; + epoch_t newest_map = 0; epoch_t get_first() const { epoch_t e = 0; @@ -51,7 +69,7 @@ public: return e; } epoch_t get_oldest() { - return oldest_map; + return cluster_osdmap_trim_lower_bound; } epoch_t get_newest() { return newest_map; @@ -62,7 +80,7 @@ public: MOSDMap(const uuid_d &f, const uint64_t features) : Message{CEPH_MSG_OSD_MAP, HEAD_VERSION, COMPAT_VERSION}, fsid(f), encode_features(features), - oldest_map(0), newest_map(0) { } + cluster_osdmap_trim_lower_bound(0), newest_map(0) { } private: ~MOSDMap() final {} public: @@ -74,10 +92,10 @@ public: decode(incremental_maps, p); decode(maps, p); if (header.version >= 2) { - decode(oldest_map, p); + decode(cluster_osdmap_trim_lower_bound, p); decode(newest_map, p); } else { - oldest_map = 0; + cluster_osdmap_trim_lower_bound = 0; newest_map = 0; } if (header.version >= 4) { @@ -143,7 +161,7 @@ public: encode(incremental_maps, payload); encode(maps, payload); if (header.version >= 2) { - encode(oldest_map, payload); + encode(cluster_osdmap_trim_lower_bound, payload); encode(newest_map, payload); } if (header.version >= 4) { @@ -154,8 +172,9 @@ public: std::string_view get_type_name() const override { return "osdmap"; } void print(std::ostream& out) const override { out << "osd_map(" << get_first() << ".." << get_last(); - if (oldest_map || newest_map) - out << " src has " << oldest_map << ".." << newest_map; + if (cluster_osdmap_trim_lower_bound || newest_map) + out << " src has " << cluster_osdmap_trim_lower_bound + << ".." << newest_map; out << ")"; } private: diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 11dbaf7d7d03..a41c805b5300 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -2858,7 +2858,7 @@ bool OSDMonitor::preprocess_get_osdmap(MonOpRequestRef op) ceph_assert(r >= 0); max_bytes -= bl.length(); } - reply->oldest_map = first; + reply->cluster_osdmap_trim_lower_bound = first; reply->newest_map = last; mon.send_reply(op, reply); return true; @@ -4467,7 +4467,7 @@ MOSDMap *OSDMonitor::build_latest_full(uint64_t features) { MOSDMap *r = new MOSDMap(mon.monmap->fsid, features); get_version_full(osdmap.get_epoch(), features, r->maps[osdmap.get_epoch()]); - r->oldest_map = get_first_committed(); + r->cluster_osdmap_trim_lower_bound = get_first_committed(); r->newest_map = osdmap.get_epoch(); return r; } @@ -4477,7 +4477,7 @@ MOSDMap *OSDMonitor::build_incremental(epoch_t from, epoch_t to, uint64_t featur dout(10) << "build_incremental [" << from << ".." << to << "] with features " << std::hex << features << std::dec << dendl; MOSDMap *m = new MOSDMap(mon.monmap->fsid, features); - m->oldest_map = get_first_committed(); + m->cluster_osdmap_trim_lower_bound = get_first_committed(); m->newest_map = osdmap.get_epoch(); for (epoch_t e = to; e >= from && e > 0; e--) { @@ -4555,7 +4555,7 @@ void OSDMonitor::send_incremental(epoch_t first, if (first < get_first_committed()) { MOSDMap *m = new MOSDMap(osdmap.get_fsid(), features); - m->oldest_map = get_first_committed(); + m->cluster_osdmap_trim_lower_bound = get_first_committed(); m->newest_map = osdmap.get_epoch(); first = get_first_committed(); diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 86838e9142a4..c8f87a18bd5c 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1415,13 +1415,13 @@ MOSDMap *OSDService::build_incremental_map_msg(epoch_t since, epoch_t to, { MOSDMap *m = new MOSDMap(monc->get_fsid(), osdmap->get_encoding_features()); - m->oldest_map = sblock.cluster_osdmap_trim_lower_bound; + m->cluster_osdmap_trim_lower_bound = sblock.cluster_osdmap_trim_lower_bound; m->newest_map = sblock.newest_map; int max = cct->_conf->osd_map_message_max; ssize_t max_bytes = cct->_conf->osd_map_message_max_bytes; - if (since < m->oldest_map) { + if (since < m->cluster_osdmap_trim_lower_bound) { // we don't have the next map the target wants, so start with a // full map. bufferlist bl; @@ -1429,7 +1429,7 @@ MOSDMap *OSDService::build_incremental_map_msg(epoch_t since, epoch_t to, << sblock.cluster_osdmap_trim_lower_bound << " > since " << since << ", starting with full map" << dendl; - since = m->oldest_map; + since = m->cluster_osdmap_trim_lower_bound; if (!get_map_bl(since, bl)) { derr << __func__ << " missing full map " << since << dendl; goto panic; @@ -1499,7 +1499,7 @@ void OSDService::send_incremental_map(epoch_t since, Connection *con, // just send latest full map MOSDMap *m = new MOSDMap(monc->get_fsid(), osdmap->get_encoding_features()); - m->oldest_map = sblock.cluster_osdmap_trim_lower_bound; + m->cluster_osdmap_trim_lower_bound = sblock.cluster_osdmap_trim_lower_bound; m->newest_map = sblock.newest_map; get_map_bl(to, m->maps[to]); send_map(m, con); @@ -8131,7 +8131,8 @@ void OSD::handle_osd_map(MOSDMap *m) epoch_t last = m->get_last(); dout(3) << "handle_osd_map epochs [" << first << "," << last << "], i have " << superblock.newest_map - << ", src has [" << m->oldest_map << "," << m->newest_map << "]" + << ", src has [" << m->cluster_osdmap_trim_lower_bound + << "," << m->newest_map << "]" << dendl; logger->inc(l_osd_map); @@ -8139,8 +8140,10 @@ void OSD::handle_osd_map(MOSDMap *m) if (first <= superblock.newest_map) logger->inc(l_osd_mape_dup, superblock.newest_map - first + 1); - if (superblock.cluster_osdmap_trim_lower_bound < m->oldest_map) { - superblock.cluster_osdmap_trim_lower_bound = m->oldest_map; + if (superblock.cluster_osdmap_trim_lower_bound < + m->cluster_osdmap_trim_lower_bound) { + superblock.cluster_osdmap_trim_lower_bound = + m->cluster_osdmap_trim_lower_bound; dout(10) << " superblock cluster_osdmap_trim_lower_bound new epoch is: " << superblock.cluster_osdmap_trim_lower_bound << dendl; ceph_assert( @@ -8160,7 +8163,7 @@ void OSD::handle_osd_map(MOSDMap *m) if (first > superblock.newest_map + 1) { dout(10) << "handle_osd_map message skips epochs " << superblock.newest_map + 1 << ".." << (first-1) << dendl; - if (m->oldest_map <= superblock.newest_map + 1) { + if (m->cluster_osdmap_trim_lower_bound <= superblock.newest_map + 1) { osdmap_subscribe(superblock.newest_map + 1, false); m->put(); return; @@ -8169,8 +8172,8 @@ void OSD::handle_osd_map(MOSDMap *m) // 1- is good to have // 2- is at present the only way to ensure that we get a *full* map as // the first map! - if (m->oldest_map < first) { - osdmap_subscribe(m->oldest_map - 1, true); + if (m->cluster_osdmap_trim_lower_bound < first) { + osdmap_subscribe(m->cluster_osdmap_trim_lower_bound - 1, true); m->put(); return; } @@ -8291,7 +8294,8 @@ void OSD::handle_osd_map(MOSDMap *m) if (superblock.oldest_map) { // make sure we at least keep pace with incoming maps - trim_maps(m->oldest_map, last - first + 1, skip_maps); + trim_maps(m->cluster_osdmap_trim_lower_bound, + last - first + 1, skip_maps); pg_num_history.prune(superblock.oldest_map); } @@ -8633,7 +8637,7 @@ void OSD::_committed_osd_maps(epoch_t first, epoch_t last, MOSDMap *m) } else if (is_preboot()) { if (m->get_source().is_mon()) - _preboot(m->oldest_map, m->newest_map); + _preboot(m->cluster_osdmap_trim_lower_bound, m->newest_map); else start_boot(); } diff --git a/src/test/mon/test_mon_workloadgen.cc b/src/test/mon/test_mon_workloadgen.cc index 147ea8bcd04b..dcd2ee6e446b 100644 --- a/src/test/mon/test_mon_workloadgen.cc +++ b/src/test/mon/test_mon_workloadgen.cc @@ -798,8 +798,10 @@ class OSDStub : public TestStub if (first > osdmap.get_epoch() + 1) { dout(5) << __func__ << osdmap.get_epoch() + 1 << ".." << (first-1) << dendl; - if ((m->oldest_map < first && osdmap.get_epoch() == 0) || - m->oldest_map <= osdmap.get_epoch()) { + if ((m->cluster_osdmap_trim_lower_bound < + first && osdmap.get_epoch() == 0) || + m->cluster_osdmap_trim_lower_bound <= + osdmap.get_epoch()) { monc.sub_want("osdmap", osdmap.get_epoch()+1, CEPH_SUBSCRIBE_ONETIME); monc.renew_subs();