From 888aee6226f5a87bc7d5e12038875a39194375bb Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Fri, 1 Oct 2021 04:55:40 -0400 Subject: [PATCH] mds: skip journaling blocklisted clients when in `replay` state When a standby MDS is transitioning to active, it passes through `replay` state. When the MDS is in this state, there are no journal segments available for recording journal updates. If the MDS receives an OSDMap update in this state, journaling blocklisted clients causes a crash since no journal segments are available. This is a bit hard to reproduce as it requires correct timing of an OSDMap update along with various other factors. Note that, when the MDS reaches `reconnect` state, it will journal the blocklisted clients anyway. This partially fixes tracker: https://tracker.ceph.com/issues/51589 which mentions a similar crash but in `reconnect` state. However, that crash was seen in nautilus. A couple of minor changes include removing hardcoded function names and carving out reusable parts into a separate function. Partially-fixes: https://tracker.ceph.com/issues/51589 Signed-off-by: Venky Shankar (cherry picked from commit 6d6236dc8d15636af8060057e6e69c26c473f987) Conflicts: src/mds/MDSRank.cc Trivial conflict: s/blocklist/blacklist/ --- src/mds/MDSRank.cc | 38 ++++++++++++++++++++++---------------- src/mds/MDSRank.h | 4 ++++ 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index bf259b35e9b..c879b23c484 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -1870,6 +1870,17 @@ void MDSRank::resolve_done() snapclient->sync(new C_MDSInternalNoop); } +void MDSRank::apply_blacklist(const std::set &addrs, epoch_t epoch) { + auto victims = server->apply_blacklist(addrs); + dout(4) << __func__ << ": killed " << victims << " blacklisted sessions (" + << addrs.size() << " blacklist entries, " + << sessionmap.get_sessions().size() << ")" << dendl; + if (victims) { + set_osd_epoch_barrier(epoch); + } +} + + void MDSRank::reconnect_start() { dout(1) << "reconnect_start" << dendl; @@ -1887,13 +1898,8 @@ void MDSRank::reconnect_start() o.get_blacklist(&blacklist); epoch = o.get_epoch(); }); - auto killed = server->apply_blacklist(blacklist); - dout(4) << "reconnect_start: killed " << killed << " blacklisted sessions (" - << blacklist.size() << " blacklist entries, " - << sessionmap.get_sessions().size() << ")" << dendl; - if (killed) { - set_osd_epoch_barrier(epoch); - } + + apply_blacklist(blacklist, epoch); server->reconnect_clients(new C_MDS_VoidFn(this, &MDSRank::reconnect_done)); finish_contexts(g_ceph_context, waiting_for_reconnect); @@ -3317,17 +3323,17 @@ void MDSRankDispatcher::handle_osd_map() purge_queue.update_op_limit(*mdsmap); - std::set newly_blacklisted; - objecter->consume_blacklist_events(&newly_blacklisted); - auto epoch = objecter->with_osdmap([](const OSDMap &o){return o.get_epoch();}); - dout(4) << "handle_osd_map epoch " << epoch << ", " - << newly_blacklisted.size() << " new blacklist entries" << dendl; - auto victims = server->apply_blacklist(newly_blacklisted); - if (victims) { - set_osd_epoch_barrier(epoch); + // it's ok if replay state is reached via standby-replay, the + // reconnect state will journal blacklisted clients (journal + // is opened for writing in `replay_done` before moving to + // up:resolve). + if (!is_replay()) { + std::set newly_blacklisted; + objecter->consume_blacklist_events(&newly_blacklisted); + auto epoch = objecter->with_osdmap([](const OSDMap &o){return o.get_epoch();}); + apply_blacklist(newly_blacklisted, epoch); } - // By default the objecter only requests OSDMap updates on use, // we would like to always receive the latest maps in order to // apply policy based on the FULL flag. diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h index 95741ee36ab..dc3b9e5a216 100644 --- a/src/mds/MDSRank.h +++ b/src/mds/MDSRank.h @@ -519,6 +519,10 @@ class MDSRank { Context *create_async_exec_context(C_ExecAndReply *ctx); + // blacklist the provided addrs and set OSD epoch barrier + // with the provided epoch. + void apply_blacklist(const std::set &addrs, epoch_t epoch); + // Incarnation as seen in MDSMap at the point where a rank is // assigned. int incarnation = 0; -- 2.47.3