}
// encode into pending incremental
+ uint64_t features = newmap.get_encoding_features();
newmap.encode(pending_inc.fullmap,
- mon->get_quorum_con_features() | CEPH_FEATURE_RESERVED);
+ features | CEPH_FEATURE_RESERVED);
pending_inc.full_crc = newmap.get_crc();
dout(20) << " full crc " << pending_inc.full_crc << dendl;
}
// determine appropriate features
features = tmp.get_encoding_features();
- dout(10) << __func__ << " encoding full map with " << features << dendl;
+ dout(10) << __func__ << " encoding full map with "
+ << ceph_release_name(tmp.require_osd_release)
+ << " features " << features << dendl;
// the features should be a subset of the mon quorum's features!
assert((features & ~mon->get_quorum_con_features()) == 0);
}
dout(10) << "committed, telling random " << s->inst << " all about it" << dendl;
+
+ // get feature of the peer
+ // use quorum_con_features, if it's an anonymous connection.
+ uint64_t features = s->con_features ? s->con_features :
+ mon->get_quorum_con_features();
// whatev, they'll request more if they need it
- MOSDMap *m = build_incremental(osdmap.get_epoch() - 1, osdmap.get_epoch());
+ MOSDMap *m = build_incremental(osdmap.get_epoch() - 1, osdmap.get_epoch(), features);
s->con->send_message(m);
// NOTE: do *not* record osd has up to this epoch (as we do
// elsewhere) as they may still need to request older values.
{
op->mark_osdmon_event(__func__);
MMonGetOSDMap *m = static_cast<MMonGetOSDMap*>(op->get_req());
+
+ uint64_t features = mon->get_quorum_con_features();
+ if (m->get_session() && m->get_session()->con_features)
+ features = m->get_session()->con_features;
+
dout(10) << __func__ << " " << *m << dendl;
MOSDMap *reply = new MOSDMap(mon->monmap->fsid);
epoch_t first = get_first_committed();
for (epoch_t e = std::max(first, m->get_full_first());
e <= std::min(last, m->get_full_last()) && max > 0;
++e, --max) {
- int r = get_version_full(e, reply->maps[e]);
+ int r = get_version_full(e, features, reply->maps[e]);
assert(r >= 0);
}
for (epoch_t e = std::max(first, m->get_inc_first());
e <= std::min(last, m->get_inc_last()) && max > 0;
++e, --max) {
- int r = get_version(e, reply->incremental_maps[e]);
+ int r = get_version(e, features, reply->incremental_maps[e]);
assert(r >= 0);
}
reply->oldest_map = first;
}
-MOSDMap *OSDMonitor::build_latest_full()
+MOSDMap *OSDMonitor::build_latest_full(uint64_t features)
{
MOSDMap *r = new MOSDMap(mon->monmap->fsid);
- get_version_full(osdmap.get_epoch(), r->maps[osdmap.get_epoch()]);
+ get_version_full(osdmap.get_epoch(), features, r->maps[osdmap.get_epoch()]);
r->oldest_map = get_first_committed();
r->newest_map = osdmap.get_epoch();
return r;
}
-MOSDMap *OSDMonitor::build_incremental(epoch_t from, epoch_t to)
+MOSDMap *OSDMonitor::build_incremental(epoch_t from, epoch_t to, uint64_t features)
{
- dout(10) << "build_incremental [" << from << ".." << to << "]" << dendl;
+ dout(10) << "build_incremental [" << from << ".." << to << "] with features " << std::hex << features << dendl;
MOSDMap *m = new MOSDMap(mon->monmap->fsid);
m->oldest_map = get_first_committed();
m->newest_map = osdmap.get_epoch();
for (epoch_t e = to; e >= from && e > 0; e--) {
bufferlist bl;
- int err = get_version(e, bl);
+ int err = get_version(e, features, bl);
if (err == 0) {
assert(bl.length());
// if (get_version(e, bl) > 0) {
} else {
assert(err == -ENOENT);
assert(!bl.length());
- get_version_full(e, bl);
+ get_version_full(e, features, bl);
if (bl.length() > 0) {
//else if (get_version("full", e, bl) > 0) {
dout(20) << "build_incremental full " << e << " "
{
op->mark_osdmon_event(__func__);
dout(5) << "send_full to " << op->get_req()->get_orig_source_inst() << dendl;
- mon->send_reply(op, build_latest_full());
+ mon->send_reply(op, build_latest_full(op->get_session()->con_features));
}
void OSDMonitor::send_incremental(MonOpRequestRef op, epoch_t first)
dout(5) << "send_incremental [" << first << ".." << osdmap.get_epoch() << "]"
<< " to " << session->inst << dendl;
+ // get feature of the peer
+ // use quorum_con_features, if it's an anonymous connection.
+ uint64_t features = session->con_features ? session->con_features :
+ mon->get_quorum_con_features();
+
if (first <= session->osd_epoch) {
dout(10) << __func__ << " " << session->inst << " should already have epoch "
<< session->osd_epoch << dendl;
first = get_first_committed();
bufferlist bl;
- int err = get_version_full(first, bl);
+ int err = get_version_full(first, features, bl);
assert(err == 0);
assert(bl.length());
dout(20) << "send_incremental starting with base full "
while (first <= osdmap.get_epoch()) {
epoch_t last = std::min<epoch_t>(first + g_conf->osd_map_message_max - 1,
osdmap.get_epoch());
- MOSDMap *m = build_incremental(first, last);
+ MOSDMap *m = build_incremental(first, last, features);
if (req) {
// send some maps. it may not be all of them, but it will get them
int OSDMonitor::get_version(version_t ver, bufferlist& bl)
{
- if (inc_osd_cache.lookup(ver, &bl)) {
- return 0;
- }
- int ret = PaxosService::get_version(ver, bl);
- if (!ret) {
- inc_osd_cache.add(ver, bl);
- }
+ return get_version(ver, mon->get_quorum_con_features(), bl);
+}
+
+void OSDMonitor::reencode_incremental_map(bufferlist& bl, uint64_t features)
+{
+ OSDMap::Incremental inc;
+ bufferlist::iterator q = bl.begin();
+ inc.decode(q);
+ // always encode with subset of osdmap's canonical features
+ uint64_t f = features & inc.encode_features;
+ dout(20) << __func__ << " " << inc.epoch << " with features " << f
+ << dendl;
+ bl.clear();
+ if (inc.fullmap.length()) {
+ // embedded full map?
+ OSDMap m;
+ m.decode(inc.fullmap);
+ inc.fullmap.clear();
+ m.encode(inc.fullmap, f | CEPH_FEATURE_RESERVED);
+ }
+ if (inc.crush.length()) {
+ // embedded crush map
+ CrushWrapper c;
+ auto p = inc.crush.begin();
+ c.decode(p);
+ inc.crush.clear();
+ c.encode(inc.crush, f);
+ }
+ inc.encode(bl, f | CEPH_FEATURE_RESERVED);
+}
+
+void OSDMonitor::reencode_full_map(bufferlist& bl, uint64_t features)
+{
+ OSDMap m;
+ bufferlist::iterator q = bl.begin();
+ m.decode(q);
+ // always encode with subset of osdmap's canonical features
+ uint64_t f = features & m.get_encoding_features();
+ dout(20) << __func__ << " " << m.get_epoch() << " with features " << f
+ << dendl;
+ bl.clear();
+ m.encode(bl, f | CEPH_FEATURE_RESERVED);
+}
+
+int OSDMonitor::get_version(version_t ver, uint64_t features, bufferlist& bl)
+{
+ uint64_t significant_features = OSDMap::get_significant_features(features);
+ if (inc_osd_cache.lookup({ver, significant_features}, &bl)) {
+ return 0;
+ }
+ int ret = PaxosService::get_version(ver, bl);
+ if (ret < 0) {
return ret;
+ }
+ // NOTE: this check is imprecise; the OSDMap encoding features may
+ // be a subset of the latest mon quorum features, but worst case we
+ // reencode once and then cache the (identical) result under both
+ // feature masks.
+ if (significant_features !=
+ OSDMap::get_significant_features(mon->get_quorum_con_features())) {
+ reencode_incremental_map(bl, features);
+ }
+ inc_osd_cache.add({ver, significant_features}, bl);
+ return 0;
}
int OSDMonitor::get_inc(version_t ver, OSDMap::Incremental& inc)
bufferlist osdm_bl;
bool has_cached_osdmap = false;
for (version_t v = ver-1; v >= closest_pinned; --v) {
- if (full_osd_cache.lookup(v, &osdm_bl)) {
+ if (full_osd_cache.lookup({v, mon->get_quorum_con_features()},
+ &osdm_bl)) {
dout(10) << __func__ << " found map in cache ver " << v << dendl;
closest_pinned = v;
has_cached_osdmap = true;
int OSDMonitor::get_version_full(version_t ver, bufferlist& bl)
{
- if (full_osd_cache.lookup(ver, &bl)) {
- return 0;
- }
- int ret = PaxosService::get_version_full(ver, bl);
- if (ret == -ENOENT) {
- // build map?
- ret = get_full_from_pinned_map(ver, bl);
- }
- if (ret != 0) {
- return ret;
- }
+ return get_version_full(ver, mon->get_quorum_con_features(), bl);
+}
- full_osd_cache.add(ver, bl);
+int OSDMonitor::get_version_full(version_t ver, uint64_t features,
+ bufferlist& bl)
+{
+ uint64_t significant_features = OSDMap::get_significant_features(features);
+ if (full_osd_cache.lookup({ver, significant_features}, &bl)) {
return 0;
+ }
+ int ret = PaxosService::get_version_full(ver, bl);
+ if (ret == -ENOENT) {
+ // build map?
+ ret = get_full_from_pinned_map(ver, bl);
+ }
+ if (ret < 0) {
+ return ret;
+ }
+ // NOTE: this check is imprecise; the OSDMap encoding features may
+ // be a subset of the latest mon quorum features, but worst case we
+ // reencode once and then cache the (identical) result under both
+ // feature masks.
+ if (significant_features !=
+ OSDMap::get_significant_features(mon->get_quorum_con_features())) {
+ reencode_full_map(bl, features);
+ }
+ full_osd_cache.add({ver, significant_features}, bl);
+ return 0;
}
epoch_t OSDMonitor::blacklist(const entity_addr_t& a, utime_t until)
if (sub->next >= 1)
send_incremental(sub->next, sub->session, sub->incremental_onetime);
else
- sub->session->con->send_message(build_latest_full());
+ sub->session->con->send_message(build_latest_full(sub->session->con_features));
if (sub->onetime)
mon->session_map.remove_sub(sub);
else
#include "erasure-code/ErasureCodeInterface.h"
#include "mon/MonOpRequest.h"
+#include <boost/functional/hash.hpp>
+// re-include our assert to clobber the system one; fix dout:
+#include "include/assert.h"
/// information about a particular peer's failure reports for one osd
struct failure_reporter_t {
map<int,double> osd_weight;
- SimpleLRU<version_t, bufferlist> inc_osd_cache;
- SimpleLRU<version_t, bufferlist> full_osd_cache;
+ using osdmap_key_t = std::pair<version_t, uint64_t>;
+ using osdmap_cache_t = SimpleLRU<osdmap_key_t,
+ bufferlist,
+ std::less<osdmap_key_t>,
+ boost::hash<osdmap_key_t>>;
+ osdmap_cache_t inc_osd_cache;
+ osdmap_cache_t full_osd_cache;
bool has_osdmap_manifest;
osdmap_manifest_t osdmap_manifest;
bool can_mark_in(int o);
// ...
- MOSDMap *build_latest_full();
- MOSDMap *build_incremental(epoch_t first, epoch_t last);
+ MOSDMap *build_latest_full(uint64_t features);
+ MOSDMap *build_incremental(epoch_t first, epoch_t last, uint64_t features);
void send_full(MonOpRequestRef op);
void send_incremental(MonOpRequestRef op, epoch_t first);
public:
int load_metadata(int osd, map<string, string>& m, ostream *err);
void count_metadata(const string& field, Formatter *f);
+
+ void reencode_incremental_map(bufferlist& bl, uint64_t features);
+ void reencode_full_map(bufferlist& bl, uint64_t features);
public:
void count_metadata(const string& field, map<string,int> *out);
protected:
mempool::osdmap::map<int64_t,OSDMap::snap_interval_set_t> *gap_removed_snaps);
int get_version(version_t ver, bufferlist& bl) override;
+ int get_version(version_t ver, uint64_t feature, bufferlist& bl);
+
+ int get_version_full(version_t ver, uint64_t feature, bufferlist& bl);
int get_version_full(version_t ver, bufferlist& bl) override;
int get_inc(version_t ver, OSDMap::Incremental& inc);
int get_full_from_pinned_map(version_t ver, bufferlist& bl);