OPTION(osd_heartbeat_grace, OPT_INT, 20)
OPTION(osd_heartbeat_min_peers, OPT_INT, 10) // minimum number of peers
OPTION(osd_heartbeat_use_min_delay_socket, OPT_BOOL, false) // prio the heartbeat tcp socket and set dscp as CS6 on it if true
+OPTION(osd_heartbeat_min_size, OPT_INT, 2000) // the minimum size of OSD heartbeat messages to send
// max number of parallel snap trims/pg
OPTION(osd_pg_max_concurrent_snap_trims, OPT_U64, 2)
*
*/
+
+/**
+ * This is used to send pings between daemons (so far, the OSDs) for
+ * heartbeat purposes. We include a timestamp and distinguish between
+ * outgoing pings and responses to those. If you set the
+ * min_message in the constructor, the message will inflate itself
+ * to the specified size -- this is good for dealing with network
+ * issues with jumbo frames. See http://tracker.ceph.com/issues/20087
+ *
+ */
+
#ifndef CEPH_MOSDPING_H
#define CEPH_MOSDPING_H
class MOSDPing : public Message {
- static const int HEAD_VERSION = 2;
+ static const int HEAD_VERSION = 3;
static const int COMPAT_VERSION = 2;
public:
__u8 op;
osd_peer_stat_t peer_stat;
utime_t stamp;
+ uint32_t min_message_size;
- MOSDPing(const uuid_d& f, epoch_t e, __u8 o, utime_t s)
+ MOSDPing(const uuid_d& f, epoch_t e, __u8 o, utime_t s, uint32_t min_message)
: Message(MSG_OSD_PING, HEAD_VERSION, COMPAT_VERSION),
- fsid(f), map_epoch(e), peer_as_of_epoch(0), op(o), stamp(s)
+ fsid(f), map_epoch(e), peer_as_of_epoch(0), op(o), stamp(s),
+ min_message_size(min_message)
{ }
MOSDPing()
- : Message(MSG_OSD_PING, HEAD_VERSION, COMPAT_VERSION)
+ : Message(MSG_OSD_PING, HEAD_VERSION, COMPAT_VERSION), min_message_size(0)
{}
private:
~MOSDPing() override {}
::decode(op, p);
::decode(peer_stat, p);
::decode(stamp, p);
+ if (header.version >= 3) {
+ bufferlist size_bl;
+ int payload_mid_length = p.get_off();
+ ::decode(size_bl, p);
+ min_message_size = size_bl.length() + payload_mid_length;
+ }
}
void encode_payload(uint64_t features) override {
::encode(fsid, payload);
::encode(op, payload);
::encode(peer_stat, payload);
::encode(stamp, payload);
+ bufferptr size_bp(MAX(min_message_size - payload.length(), 0));
+ bufferlist size_bl;;
+ size_bl.push_back(size_bp);
+ ::encode(size_bl, payload);
}
const char *get_type_name() const override { return "osd_ping"; }
Message *r = new MOSDPing(monc->get_fsid(),
curmap->get_epoch(),
- MOSDPing::PING_REPLY,
- m->stamp);
+ MOSDPing::PING_REPLY, m->stamp,
+ cct->_conf->osd_heartbeat_min_size);
m->get_connection()->send_message(r);
if (curmap->is_up(from)) {
Message *r = new MOSDPing(monc->get_fsid(),
curmap->get_epoch(),
MOSDPing::YOU_DIED,
- m->stamp);
+ m->stamp,
+ cct->_conf->osd_heartbeat_min_size);
m->get_connection()->send_message(r);
}
}
dout(30) << "heartbeat sending ping to osd." << peer << dendl;
i->second.con_back->send_message(new MOSDPing(monc->get_fsid(),
service.get_osdmap()->get_epoch(),
- MOSDPing::PING,
- now));
+ MOSDPing::PING, now,
+ cct->_conf->osd_heartbeat_min_size));
if (i->second.con_front)
i->second.con_front->send_message(new MOSDPing(monc->get_fsid(),
service.get_osdmap()->get_epoch(),
- MOSDPing::PING,
- now));
+ MOSDPing::PING, now,
+ cct->_conf->osd_heartbeat_min_size));
}
logger->set(l_osd_hb_to, heartbeat_peers.size());