{
switch (qs) {
case QS__INVALID:
- return os << "QS__INVALID (" << (int)qs << ")";
+ return os << "QS__INVALID";
case QS_QUIESCING:
- return os << "QS_QUIESCING (" << (int)qs << ")";
+ return os << "QS_QUIESCING";
case QS_QUIESCED:
- return os << "QS_QUIESCED (" << (int)qs << ")";
+ return os << "QS_QUIESCED";
case QS_RELEASING:
- return os << "QS_RELEASING (" << (int)qs << ")";
+ return os << "QS_RELEASING";
case QS_RELEASED:
- return os << "QS_RELEASED (" << (int)qs << ")";
+ return os << "QS_RELEASED";
case QS_FAILED:
- return os << "QS_FAILED (" << (int)qs << ")";
+ return os << "QS_FAILED";
case QS_CANCELED:
- return os << "QS_CANCELED (" << (int)qs << ")";
+ return os << "QS_CANCELED";
case QS_TIMEDOUT:
- return os << "QS_TIMEDOUT (" << (int)qs << ")";
+ return os << "QS_TIMEDOUT";
case QS_EXPIRED:
- return os << "QS_EXPIRED (" << (int)qs << ")";
+ return os << "QS_EXPIRED";
default:
return os << "!Unknown quiesce state! (" << (int)qs << ")";
}
auto it = peers.find(from);
if (it == peers.end()) {
+ dout(5) << "unknown peer " << from << dendl;
// ignore updates from unknown peers
return;
}
auto & info = it->second;
if (diff_map.db_version > db_version()) {
- dout(3) << "ignoring unknown version ack by rank " << from << " (" << diff_map.db_version << " > " << db_version() << ")" << dendl;
- dout(5) << "will send the peer a full DB" << dendl;
- info.diff_map.clear();
+ dout(15) << "future version ack by peer " << from << " (" << diff_map.db_version << " > " << db_version() << ")" << dendl;
+ if (diff_map.db_version.epoch > db_version().epoch && diff_map.db_version.set_version <= db_version().set_version) {
+ dout(15) << "my epoch is behind, ignoring this until my membership is updated" << dendl;
+ } else {
+ dout(5) << "will send the peer a full DB" << dendl;
+ info.diff_map.clear();
+ }
} else {
+ dout(20) << "ack " << diff_map << " from peer " << from << dendl;
info.diff_map = std::move(diff_map);
info.last_seen = QuiesceClock::now();
}
max_reported_state = QS__INVALID;
size_t up_to_date_peers = 0;
+ std::multimap<QuiesceState, std::pair<QuiesceInterface::PeerId, QuiesceDbVersion>> reporting_peers;
for (auto& [peer, info] : peers) {
// we consider the last bit of information we had from a given peer
continue;
}
reported_state = pr_state.state;
+ reporting_peers.insert({pr_state.state, {peer, info.diff_map.db_version}});
}
// but we only consider the peer up to date given the version
}
if (min_reported_state == QS__MAX) {
+ // this means that we had 0 eligible peer reports
min_reported_state = set.get_requested_member_state();
max_reported_state = set.get_requested_member_state();
}
+ dout(20) << dsetroot("")
+ << "up_to_date_peers: " << up_to_date_peers
+ << " min_reported_state: " << min_reported_state
+ << " max_reported_state: " << max_reported_state
+ << " peer_acks: " << reporting_peers
+ << dendl;
+
return up_to_date_peers;
}
Db& internal_db() {
return db;
}
- QuiesceClusterMembership& internal_membership() {
- return membership;
- }
decltype(pending_requests)& internal_pending_requests() {
return pending_requests;
}
decltype(peers)& internal_peers() {
return peers;
}
+ epoch_t bump_epoch() {
+ std::lock_guard l(submit_mutex);
+ submit_condition.notify_all();
+ return ++cluster_membership->epoch;
+ }
};
epoch_t epoch = 0;
ASSERT_EQ(2, last_request->response.sets.size());
EXPECT_EQ(QS_QUIESCING, last_request->response.sets.at("set1").rstate.state);
EXPECT_EQ(QS_QUIESCING, last_request->response.sets.at("set2").rstate.state);
+}
+
+/* ========================================= */
+TEST_F(QuiesceDbTest, AckDuringEpochMismatch)
+{
+ ASSERT_NO_FATAL_FAILURE(configure_cluster({ mds_gid_t(1), mds_gid_t(2) }));
+ managers.at(mds_gid_t(1))->reset_agent_callback(QUIESCING_AGENT_CB);
+
+ ASSERT_EQ(OK(), run_request([](auto& r) {
+ r.set_id = "set1";
+ r.timeout = sec(60);
+ r.expiration = sec(60);
+ r.include_roots({ "root1" });
+ }));
+
+ // we are quiescing because manager 2 hasn't yet acknowledged the new state
+ EXPECT_EQ(QS_QUIESCING, last_request->response.sets.at("set1").rstate.state);
+
+ // imagine that a new epoch has started on the peer before it did for the leader
+ managers.at(mds_gid_t(2))->bump_epoch();
+
+ // do the acking while our epoch is higher
+ {
+ // wait for the agent to ack root1 as failed
+ auto did_ack = add_ack_hook([](auto gid, auto const& ack) {
+ return gid == mds_gid_t(2) && ack.roots.contains("file:/root1") && ack.roots.at("file:/root1").state == QS_QUIESCED;
+ });
+
+ // allow acks
+ managers.at(mds_gid_t(2))->reset_agent_callback(QUIESCING_AGENT_CB);
+
+ EXPECT_EQ(std::future_status::ready, did_ack.wait_for(std::chrono::milliseconds(100)));
+ }
+
+ // now, bump the epoch on the leader and make sure it quiesces the set
+ managers.at(mds_gid_t(1))->bump_epoch();
+ EXPECT_EQ(OK(), run_request([](auto& r) {
+ r.set_id = "set1";
+ r.await = sec(10);
+ }));
}
\ No newline at end of file