PeeringState::get_required_past_interval_bounds takes as a parameter
oldest_epoch, which comes from OSDSuperblock::cluster_osdmap_trim_lower_bound.
We use cluster_osdmap_trim_lower_bound rather than a specific osd's oldest_map
-because we don't necessarily trim all MOSDMap::oldest_map. In order to avoid
-doing too much work at once we limit the amount of osdmaps trimmed using
-``osd_target_transaction_size`` in OSD::trim_maps().
+because we don't necessarily trim all MOSDMap::cluster_osdmap_trim_lower_bound.
+In order to avoid doing too much work at once we limit the amount of osdmaps
+trimmed using ``osd_target_transaction_size`` in OSD::trim_maps().
For this reason, a specific OSD's oldest_map can lag behind
OSDSuperblock::cluster_osdmap_trim_lower_bound
for a while.
uint64_t encode_features = 0;
std::map<epoch_t, ceph::buffer::list> maps;
std::map<epoch_t, ceph::buffer::list> incremental_maps;
- epoch_t oldest_map =0, newest_map = 0;
+ /**
+ * cluster_osdmap_trim_lower_bound
+ *
+ * Encodes a lower bound on the monitor's osdmap trim bound. Recipients
+ * can safely trim up to this bound. The sender stores maps back to
+ * cluster_osdmap_trim_lower_bound.
+ *
+ * This field was formerly named oldest_map and encoded the oldest map
+ * stored by the sender. The primary usage of this field, however, was to
+ * allow the recipient to trim. The secondary usage was to inform the
+ * recipient of how many maps the sender stored in case it needed to request
+ * more. For both purposes, it should be safe for an older OSD to interpret
+ * this field as oldest_map, and it should be safe for a new osd to interpret
+ * the oldest_map field sent by an older osd as
+ * cluster_osdmap_trim_lower_bound.
+ * See bug https://tracker.ceph.com/issues/49689
+ */
+ epoch_t cluster_osdmap_trim_lower_bound = 0;
+ epoch_t newest_map = 0;
epoch_t get_first() const {
epoch_t e = 0;
return e;
}
epoch_t get_oldest() {
- return oldest_map;
+ return cluster_osdmap_trim_lower_bound;
}
epoch_t get_newest() {
return newest_map;
MOSDMap(const uuid_d &f, const uint64_t features)
: Message{CEPH_MSG_OSD_MAP, HEAD_VERSION, COMPAT_VERSION},
fsid(f), encode_features(features),
- oldest_map(0), newest_map(0) { }
+ cluster_osdmap_trim_lower_bound(0), newest_map(0) { }
private:
~MOSDMap() final {}
public:
decode(incremental_maps, p);
decode(maps, p);
if (header.version >= 2) {
- decode(oldest_map, p);
+ decode(cluster_osdmap_trim_lower_bound, p);
decode(newest_map, p);
} else {
- oldest_map = 0;
+ cluster_osdmap_trim_lower_bound = 0;
newest_map = 0;
}
if (header.version >= 4) {
encode(incremental_maps, payload);
encode(maps, payload);
if (header.version >= 2) {
- encode(oldest_map, payload);
+ encode(cluster_osdmap_trim_lower_bound, payload);
encode(newest_map, payload);
}
if (header.version >= 4) {
std::string_view get_type_name() const override { return "osdmap"; }
void print(std::ostream& out) const override {
out << "osd_map(" << get_first() << ".." << get_last();
- if (oldest_map || newest_map)
- out << " src has " << oldest_map << ".." << newest_map;
+ if (cluster_osdmap_trim_lower_bound || newest_map)
+ out << " src has " << cluster_osdmap_trim_lower_bound
+ << ".." << newest_map;
out << ")";
}
private:
ceph_assert(r >= 0);
max_bytes -= bl.length();
}
- reply->oldest_map = first;
+ reply->cluster_osdmap_trim_lower_bound = first;
reply->newest_map = last;
mon.send_reply(op, reply);
return true;
{
MOSDMap *r = new MOSDMap(mon.monmap->fsid, features);
get_version_full(osdmap.get_epoch(), features, r->maps[osdmap.get_epoch()]);
- r->oldest_map = get_first_committed();
+ r->cluster_osdmap_trim_lower_bound = get_first_committed();
r->newest_map = osdmap.get_epoch();
return r;
}
dout(10) << "build_incremental [" << from << ".." << to << "] with features "
<< std::hex << features << std::dec << dendl;
MOSDMap *m = new MOSDMap(mon.monmap->fsid, features);
- m->oldest_map = get_first_committed();
+ m->cluster_osdmap_trim_lower_bound = get_first_committed();
m->newest_map = osdmap.get_epoch();
for (epoch_t e = to; e >= from && e > 0; e--) {
if (first < get_first_committed()) {
MOSDMap *m = new MOSDMap(osdmap.get_fsid(), features);
- m->oldest_map = get_first_committed();
+ m->cluster_osdmap_trim_lower_bound = get_first_committed();
m->newest_map = osdmap.get_epoch();
first = get_first_committed();
{
MOSDMap *m = new MOSDMap(monc->get_fsid(),
osdmap->get_encoding_features());
- m->oldest_map = sblock.cluster_osdmap_trim_lower_bound;
+ m->cluster_osdmap_trim_lower_bound = sblock.cluster_osdmap_trim_lower_bound;
m->newest_map = sblock.newest_map;
int max = cct->_conf->osd_map_message_max;
ssize_t max_bytes = cct->_conf->osd_map_message_max_bytes;
- if (since < m->oldest_map) {
+ if (since < m->cluster_osdmap_trim_lower_bound) {
// we don't have the next map the target wants, so start with a
// full map.
bufferlist bl;
<< sblock.cluster_osdmap_trim_lower_bound
<< " > since " << since << ", starting with full map"
<< dendl;
- since = m->oldest_map;
+ since = m->cluster_osdmap_trim_lower_bound;
if (!get_map_bl(since, bl)) {
derr << __func__ << " missing full map " << since << dendl;
goto panic;
// just send latest full map
MOSDMap *m = new MOSDMap(monc->get_fsid(),
osdmap->get_encoding_features());
- m->oldest_map = sblock.cluster_osdmap_trim_lower_bound;
+ m->cluster_osdmap_trim_lower_bound = sblock.cluster_osdmap_trim_lower_bound;
m->newest_map = sblock.newest_map;
get_map_bl(to, m->maps[to]);
send_map(m, con);
epoch_t last = m->get_last();
dout(3) << "handle_osd_map epochs [" << first << "," << last << "], i have "
<< superblock.newest_map
- << ", src has [" << m->oldest_map << "," << m->newest_map << "]"
+ << ", src has [" << m->cluster_osdmap_trim_lower_bound
+ << "," << m->newest_map << "]"
<< dendl;
logger->inc(l_osd_map);
if (first <= superblock.newest_map)
logger->inc(l_osd_mape_dup, superblock.newest_map - first + 1);
- if (superblock.cluster_osdmap_trim_lower_bound < m->oldest_map) {
- superblock.cluster_osdmap_trim_lower_bound = m->oldest_map;
+ if (superblock.cluster_osdmap_trim_lower_bound <
+ m->cluster_osdmap_trim_lower_bound) {
+ superblock.cluster_osdmap_trim_lower_bound =
+ m->cluster_osdmap_trim_lower_bound;
dout(10) << " superblock cluster_osdmap_trim_lower_bound new epoch is: "
<< superblock.cluster_osdmap_trim_lower_bound << dendl;
ceph_assert(
if (first > superblock.newest_map + 1) {
dout(10) << "handle_osd_map message skips epochs "
<< superblock.newest_map + 1 << ".." << (first-1) << dendl;
- if (m->oldest_map <= superblock.newest_map + 1) {
+ if (m->cluster_osdmap_trim_lower_bound <= superblock.newest_map + 1) {
osdmap_subscribe(superblock.newest_map + 1, false);
m->put();
return;
// 1- is good to have
// 2- is at present the only way to ensure that we get a *full* map as
// the first map!
- if (m->oldest_map < first) {
- osdmap_subscribe(m->oldest_map - 1, true);
+ if (m->cluster_osdmap_trim_lower_bound < first) {
+ osdmap_subscribe(m->cluster_osdmap_trim_lower_bound - 1, true);
m->put();
return;
}
if (superblock.oldest_map) {
// make sure we at least keep pace with incoming maps
- trim_maps(m->oldest_map, last - first + 1, skip_maps);
+ trim_maps(m->cluster_osdmap_trim_lower_bound,
+ last - first + 1, skip_maps);
pg_num_history.prune(superblock.oldest_map);
}
}
else if (is_preboot()) {
if (m->get_source().is_mon())
- _preboot(m->oldest_map, m->newest_map);
+ _preboot(m->cluster_osdmap_trim_lower_bound, m->newest_map);
else
start_boot();
}
if (first > osdmap.get_epoch() + 1) {
dout(5) << __func__
<< osdmap.get_epoch() + 1 << ".." << (first-1) << dendl;
- if ((m->oldest_map < first && osdmap.get_epoch() == 0) ||
- m->oldest_map <= osdmap.get_epoch()) {
+ if ((m->cluster_osdmap_trim_lower_bound <
+ first && osdmap.get_epoch() == 0) ||
+ m->cluster_osdmap_trim_lower_bound <=
+ osdmap.get_epoch()) {
monc.sub_want("osdmap", osdmap.get_epoch()+1,
CEPH_SUBSCRIBE_ONETIME);
monc.renew_subs();