sync_full(false),
sync_start_version(0),
sync_timeout_event(NULL),
+ sync_last_committed_floor(0),
timecheck_round(0),
timecheck_acks(0),
}
}
+ sync_last_committed_floor = store->get("mon_sync", "last_committed_floor");
+ dout(10) << "sync_last_committed_floor " << sync_last_committed_floor << dendl;
+
init_paxos();
health_monitor->init();
sync_obtain_latest_monmap(backup_monmap);
assert(backup_monmap.length() > 0);
+ sync_last_committed_floor = MAX(sync_last_committed_floor, paxos->get_version());
+ dout(10) << __func__ << " marking sync in progress, storing sync_last_commited_floor "
+ << sync_last_committed_floor << dendl;
+
t.put("mon_sync", "latest_monmap", backup_monmap);
t.put("mon_sync", "in_sync", 1);
+ t.put("mon_sync", "last_committed_floor", sync_last_committed_floor);
store->apply_transaction(t);
assert(g_conf->mon_sync_requester_kill_at != 1);
MonitorDBStore::Transaction t;
t.erase("mon_sync", "in_sync");
t.erase("mon_sync", "force_sync");
+ t.erase("mon_sync", "last_committed_floor");
store->apply_transaction(t);
sync_reset();
entity_inst_t other = m->get_source_inst();
- if (paxos->get_version() < m->paxos_first_version &&
- m->paxos_first_version > 1) { // no need to sync if we're 0 and they start at 1.
+ if (m->paxos_last_version < sync_last_committed_floor) {
dout(10) << " peer paxos versions [" << m->paxos_first_version
- << "," << m->paxos_last_version << "]"
- << " vs my version " << paxos->get_version()
- << " (too far ahead)"
- << dendl;
- cancel_probe_timeout();
- sync_start(other, true);
- m->put();
- return;
- }
- if (paxos->get_version() + g_conf->paxos_max_join_drift < m->paxos_last_version) {
- dout(10) << " peer paxos version " << m->paxos_last_version
- << " vs my version " << paxos->get_version()
- << " (too far ahead)"
+ << "," << m->paxos_last_version << "] < my sync_last_committed_floor "
+ << sync_last_committed_floor << ", ignoring"
<< dendl;
- cancel_probe_timeout();
- sync_start(other, false);
- m->put();
- return;
+ } else {
+ if (paxos->get_version() < m->paxos_first_version &&
+ m->paxos_first_version > 1) { // no need to sync if we're 0 and they start at 1.
+ dout(10) << " peer paxos versions [" << m->paxos_first_version
+ << "," << m->paxos_last_version << "]"
+ << " vs my version " << paxos->get_version()
+ << " (too far ahead)"
+ << dendl;
+ cancel_probe_timeout();
+ sync_start(other, true);
+ m->put();
+ return;
+ }
+ if (paxos->get_version() + g_conf->paxos_max_join_drift < m->paxos_last_version) {
+ dout(10) << " peer paxos version " << m->paxos_last_version
+ << " vs my version " << paxos->get_version()
+ << " (too far ahead)"
+ << dendl;
+ cancel_probe_timeout();
+ sync_start(other, false);
+ m->put();
+ return;
+ }
}
// is there an existing quorum?
version_t sync_start_version; ///< last_committed at sync start
Context *sync_timeout_event; ///< timeout event
+ /**
+ * floor for sync source
+ *
+ * When we sync we forget about our old last_committed value which
+ * can be dangerous. For example, if we have a cluster of:
+ *
+ * mon.a: lc 100
+ * mon.b: lc 80
+ * mon.c: lc 100 (us)
+ *
+ * If something forces us to sync (say, corruption, or manual
+ * intervention, or bug), we forget last_committed, and might abort.
+ * If mon.a happens to be down when we come back, we will see:
+ *
+ * mon.b: lc 80
+ * mon.c: lc 0 (us)
+ *
+ * and sync from mon.b, at which point a+b will both have lc 80 and
+ * come online with a majority holding out of date commits.
+ *
+ * Avoid this by preserving our old last_committed value prior to
+ * sync and never going backwards.
+ */
+ version_t sync_last_committed_floor;
+
struct C_SyncTimeout : public Context {
Monitor *mon;
C_SyncTimeout(Monitor *m) : mon(m) {}