"daemons remain in the OSD map")
.set_flag(Option::FLAG_RUNTIME),
+ Option("mon_mgr_blacklist_interval", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+ .set_default(1_day)
+ .set_min(1_hr)
+ .add_service("mon")
+ .set_description("Duration in seconds that blacklist entries for mgr "
+ "daemons remain in the OSD map")
+ .set_flag(Option::FLAG_RUNTIME),
+
Option("mon_osd_crush_smoke_test", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(true)
.add_service("mon")
register_async_signal_handler_oneshot(SIGINT, handle_mgr_signal);
register_async_signal_handler_oneshot(SIGTERM, handle_mgr_signal);
- // Start communicating with daemons to learn statistics etc
- int r = server.init(monc->get_global_id(), client_messenger->get_myaddrs());
- if (r < 0) {
- derr << "Initialize server fail: " << cpp_strerror(r) << dendl;
- // This is typically due to a bind() failure, so let's let
- // systemd restart us.
- exit(1);
- }
- dout(4) << "Initialized server at " << server.get_myaddrs() << dendl;
-
- // Preload all daemon metadata (will subsequently keep this
- // up to date by watching maps, so do the initial load before
- // we subscribe to any maps)
- dout(4) << "Loading daemon metadata..." << dendl;
- load_all_metadata();
-
// subscribe to all the maps
monc->sub_want("log-info", 0, 0);
monc->sub_want("mgrdigest", 0, 0);
// Start Objecter and wait for OSD map
lock.unlock(); // Drop lock because OSDMap dispatch calls into my ms_dispatch
- objecter->wait_for_osd_map();
+ epoch_t e;
+ cluster_state.with_mgrmap([&e](const MgrMap& m) {
+ e = m.last_failure_osd_epoch;
+ });
+ /* wait for any blacklists to be applied to previous mgr instance */
+ dout(4) << "Waiting for new OSDMap (e=" << e
+ << ") that may blacklist prior active." << dendl;
+ objecter->wait_for_osd_map(e);
lock.lock();
+ // Start communicating with daemons to learn statistics etc
+ int r = server.init(monc->get_global_id(), client_messenger->get_myaddrs());
+ if (r < 0) {
+ derr << "Initialize server fail: " << cpp_strerror(r) << dendl;
+ // This is typically due to a bind() failure, so let's let
+ // systemd restart us.
+ exit(1);
+ }
+ dout(4) << "Initialized server at " << server.get_myaddrs() << dendl;
+
+ // Preload all daemon metadata (will subsequently keep this
+ // up to date by watching maps, so do the initial load before
+ // we subscribe to any maps)
+ dout(4) << "Loading daemon metadata..." << dendl;
+ load_all_metadata();
+
// Populate PGs in ClusterState
cluster_state.with_osdmap_and_pgmap([this](const OSDMap &osd_map,
const PGMap& pg_map) {
};
epoch_t epoch = 0;
+ epoch_t last_failure_osd_epoch = 0;
/// global_id of the ceph-mgr instance selected as a leader
uint64_t active_gid = 0;
std::map<std::string, std::string> services;
epoch_t get_epoch() const { return epoch; }
+ epoch_t get_last_failure_osd_epoch() const { return last_failure_osd_epoch; }
entity_addrvec_t get_active_addrs() const { return active_addrs; }
uint64_t get_active_gid() const { return active_gid; }
bool get_available() const { return available; }
ENCODE_FINISH(bl);
return;
}
- ENCODE_START(9, 6, bl);
+ ENCODE_START(10, 6, bl);
encode(epoch, bl);
encode(active_addrs, bl, features);
encode(active_gid, bl);
encode(active_change, bl);
encode(always_on_modules, bl);
encode(active_mgr_features, bl);
+ encode(last_failure_osd_epoch, bl);
ENCODE_FINISH(bl);
return;
}
if (struct_v >= 9) {
decode(active_mgr_features, p);
}
+ if (struct_v >= 10) {
+ decode(last_failure_osd_epoch, p);
+ }
DECODE_FINISH(p);
}
}
f->close_section();
}
+ f->dump_int("last_failure_osd_epoch", last_failure_osd_epoch);
f->close_section();
}
dout(4) << "Active daemon restart (mgr." << m->get_name() << ")" << dendl;
mon->clog->info() << "Active manager daemon " << m->get_name()
<< " restarted";
+ if (!mon->osdmon()->is_writeable()) {
+ dout(1) << __func__ << ": waiting for osdmon writeable to"
+ " blacklist old instance." << dendl;
+ mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
+ return false;
+ }
drop_active();
}
}
if (pending_map.active_gid != 0
- && last_beacon.at(pending_map.active_gid) < cutoff) {
+ && last_beacon.at(pending_map.active_gid) < cutoff
+ && mon->osdmon()->is_writeable()) {
const std::string old_active_name = pending_map.active_name;
drop_active();
propose = true;
void MgrMonitor::drop_active()
{
+ ceph_assert(mon->osdmon()->is_writeable());
+
if (last_beacon.count(pending_map.active_gid) > 0) {
last_beacon.erase(pending_map.active_gid);
}
+ ceph_assert(pending_map.active_gid > 0);
+ auto until = ceph_clock_now();
+ until += g_conf().get_val<double>("mon_mgr_blacklist_interval");
+ dout(5) << "blacklisting previous mgr." << pending_map.active_name << "."
+ << pending_map.active_gid << " ("
+ << pending_map.active_addrs << ")" << dendl;
+ auto blacklist_epoch = mon->osdmon()->blacklist(pending_map.active_addrs, until);
+ request_proposal(mon->osdmon());
+
pending_metadata_rm.insert(pending_map.active_name);
pending_metadata.erase(pending_map.active_name);
pending_map.active_name = "";
pending_map.available = false;
pending_map.active_addrs = entity_addrvec_t();
pending_map.services.clear();
+ pending_map.last_failure_osd_epoch = blacklist_epoch;
// So that when new active mgr subscribes to mgrdigest, it will
// get an immediate response instead of waiting for next timer
if (!err.empty()) {
// Does not parse as a gid, treat it as a name
if (pending_map.active_name == who) {
+ if (!mon->osdmon()->is_writeable()) {
+ mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
+ return false;
+ }
drop_active();
changed = true;
} else {
}
} else {
if (pending_map.active_gid == gid) {
+ if (!mon->osdmon()->is_writeable()) {
+ mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
+ return false;
+ }
drop_active();
changed = true;
} else if (pending_map.standbys.count(gid) > 0) {
logger->set(l_osdc_osd_sessions, osd_sessions.size());
}
-void Objecter::wait_for_osd_map()
+void Objecter::wait_for_osd_map(epoch_t e)
{
unique_lock l(rwlock);
- if (osdmap->get_epoch()) {
+ if (osdmap->get_epoch() >= e) {
l.unlock();
return;
}
bool done;
std::unique_lock mlock{lock};
C_SafeCond *context = new C_SafeCond(lock, cond, &done, NULL);
- waiting_for_map[0].push_back(pair<Context*, int>(context, 0));
+ waiting_for_map[e].push_back(pair<Context*, int>(context, 0));
l.unlock();
cond.wait(mlock, [&done] { return done; });
}
void handle_osd_backoff(class MOSDBackoff *m);
void handle_watch_notify(class MWatchNotify *m);
void handle_osd_map(class MOSDMap *m);
- void wait_for_osd_map();
+ void wait_for_osd_map(epoch_t e=0);
/**
* Get std::list of entities blacklisted since this was last called,