Add a safety builtin that use max(mon_osd_report_timout,2*osd_beacon_report_interval)
as osd timeout by adding osd_beacon_report_interval into MOSDBeacon. This will allow
for the OSD to miss a beacon and catch it on the next beacon without being marked down
by the monitor.
Fixes: https://tracker.ceph.com/issues/40668
Signed-off-by: luo rixin <luorixin@huawei.com>
epoch_t min_last_epoch_clean = osdmap->get_epoch();
auto m = make_message<MOSDBeacon>(osdmap->get_epoch(),
min_last_epoch_clean,
- superblock.last_purged_snaps_scrub);
+ superblock.last_purged_snaps_scrub,
+ local_conf()->osd_beacon_report_interval);
return monc->send_message(m);
}
class MOSDBeacon : public PaxosServiceMessage {
private:
- static constexpr int HEAD_VERSION = 2;
+ static constexpr int HEAD_VERSION = 3;
static constexpr int COMPAT_VERSION = 1;
public:
std::vector<pg_t> pgs;
epoch_t min_last_epoch_clean = 0;
utime_t last_purged_snaps_scrub;
+ int osd_beacon_report_interval = 0;
MOSDBeacon()
: PaxosServiceMessage{MSG_OSD_BEACON, 0,
HEAD_VERSION, COMPAT_VERSION}
{}
- MOSDBeacon(epoch_t e, epoch_t min_lec, utime_t ls)
+ MOSDBeacon(epoch_t e, epoch_t min_lec, utime_t ls, int interval)
: PaxosServiceMessage{MSG_OSD_BEACON, e,
HEAD_VERSION, COMPAT_VERSION},
min_last_epoch_clean(min_lec),
- last_purged_snaps_scrub(ls)
+ last_purged_snaps_scrub(ls),
+ osd_beacon_report_interval(interval)
{}
void encode_payload(uint64_t features) override {
using ceph::encode;
encode(pgs, payload);
encode(min_last_epoch_clean, payload);
encode(last_purged_snaps_scrub, payload);
+ encode(osd_beacon_report_interval, payload);
}
void decode_payload() override {
auto p = payload.cbegin();
if (header.version >= 2) {
decode(last_purged_snaps_scrub, p);
}
+ if (header.version >= 3) {
+ decode(osd_beacon_report_interval, p);
+ } else {
+ osd_beacon_report_interval = 0;
+ }
}
std::string_view get_type_name() const override { return "osd_beacon"; }
void print(std::ostream &out) const {
out << get_type_name()
<< "(pgs " << pgs
<< " lec " << min_last_epoch_clean
- << " last_purged_snaps_scrub " << last_purged_snaps_scrub
+ << " last_purged_snaps_scrub " << last_purged_snaps_scrub
+ << " osd_beacon_report_interval " << osd_beacon_report_interval
<< " v" << version << ")";
}
private:
return false;
}
- last_osd_report[from] = ceph_clock_now();
+ last_osd_report[from].first = ceph_clock_now();
+ last_osd_report[from].second = beacon->osd_beacon_report_interval;
osd_epochs[from] = beacon->version;
for (const auto& pg : beacon->pgs) {
}
bool OSDMonitor::handle_osd_timeouts(const utime_t &now,
- std::map<int,utime_t> &last_osd_report)
+ std::map<int, std::pair<utime_t, int>> &last_osd_report)
{
utime_t timeo(g_conf()->mon_osd_report_timeout, 0);
if (now - mon->get_leader_since() < timeo) {
}
if (!osdmap.is_up(i))
continue;
- const std::map<int,utime_t>::const_iterator t = last_osd_report.find(i);
+ const std::map<int, std::pair<utime_t, int>>::const_iterator t = last_osd_report.find(i);
if (t == last_osd_report.end()) {
// it wasn't in the map; start the timer.
- last_osd_report[i] = now;
+ last_osd_report[i].first = now;
+ last_osd_report[i].second = 0;
} else if (can_mark_down(i)) {
- utime_t diff = now - t->second;
- if (diff > timeo) {
- mon->clog->info() << "osd." << i << " marked down after no beacon for "
- << diff << " seconds";
- derr << "no beacon from osd." << i << " since " << t->second
- << ", " << diff << " seconds ago. marking down" << dendl;
- pending_inc.new_state[i] = CEPH_OSD_UP;
- new_down = true;
+ utime_t diff = now - t->second.first;
+ // we use the max(mon_osd_report_timeout, 2*osd_beacon_report_interval) as timeout
+ // to allow for the osd to miss a beacon.
+ int mon_osd_report_timeout = g_conf()->mon_osd_report_timeout;
+ utime_t max_timeout(std::max(mon_osd_report_timeout, 2 * t->second.second), 0);
+ if (diff > max_timeout) {
+ mon->clog->info() << "osd." << i << " marked down after no beacon for "
+ << diff << " seconds";
+ derr << "no beacon from osd." << i << " since " << t->second.first
+ << ", " << diff << " seconds ago. marking down" << dendl;
+ pending_inc.new_state[i] = CEPH_OSD_UP;
+ new_down = true;
}
}
}
#include <map>
#include <set>
+#include <utility>
#include "include/types.h"
#include "include/encoding.h"
bool is_pool_currently_all_bluestore(int64_t pool_id, const pg_pool_t &pool,
std::ostream *err);
- // when we last received PG stats from each osd
- std::map<int,utime_t> last_osd_report;
+ // when we last received PG stats from each osd and the osd's osd_beacon_report_interval
+ std::map<int, std::pair<utime_t, int>> last_osd_report;
// TODO: use last_osd_report to store the osd report epochs, once we don't
// need to upgrade from pre-luminous releases.
std::map<int,epoch_t> osd_epochs;
bool preparing);
bool handle_osd_timeouts(const utime_t &now,
- std::map<int,utime_t> &last_osd_report);
+ std::map<int, std::pair<utime_t, int>> &last_osd_report);
void send_latest(MonOpRequestRef op, epoch_t start=0);
void send_latest_now_nodelete(MonOpRequestRef op, epoch_t start=0) {
std::lock_guard l{min_last_epoch_clean_lock};
beacon = new MOSDBeacon(get_osdmap_epoch(),
min_last_epoch_clean,
- superblock.last_purged_snaps_scrub);
+ superblock.last_purged_snaps_scrub,
+ cct->_conf->osd_beacon_report_interval);
beacon->pgs = min_last_epoch_clean_pgs;
last_sent_beacon = now;
}