From 8a97339963680e46ea6a411323a69f733dfbdeef Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Fri, 1 Oct 2021 04:55:40 -0400 Subject: [PATCH] mds: skip journaling blocklisted clients when in `replay` state When a standby MDS is transitioning to active, it passes through `replay` state. When the MDS is in this state, there are no journal segments available for recording journal updates. If the MDS receives an OSDMap update in this state, journaling blocklisted clients causes a crash since no journal segments are available. This is a bit hard to reproduce as it requires correct timing of an OSDMap update along with various other factors. Note that, when the MDS reaches `reconnect` state, it will journal the blocklisted clients anyway. This partially fixes tracker: https://tracker.ceph.com/issues/51589 which mentions a similar crash but in `reconnect` state. However, that crash was seen in nautilus. A couple of minor changes include removing hardcoded function names and carving out reusable parts into a separate function. Partially-fixes: https://tracker.ceph.com/issues/51589 Signed-off-by: Venky Shankar (cherry picked from commit 6d6236dc8d15636af8060057e6e69c26c473f987) --- src/mds/MDSRank.cc | 38 ++++++++++++++++++++++---------------- src/mds/MDSRank.h | 4 ++++ 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 434294b0e0aeb..3d5ed6e2c59b1 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -1926,6 +1926,17 @@ void MDSRank::resolve_done() snapclient->sync(new C_MDSInternalNoop); } +void MDSRank::apply_blocklist(const std::set &addrs, epoch_t epoch) { + auto victims = server->apply_blocklist(addrs); + dout(4) << __func__ << ": killed " << victims << " blocklisted sessions (" + << addrs.size() << " blocklist entries, " + << sessionmap.get_sessions().size() << ")" << dendl; + if (victims) { + set_osd_epoch_barrier(epoch); + } +} + + void MDSRank::reconnect_start() { dout(1) << "reconnect_start" << dendl; @@ -1943,13 +1954,8 @@ void MDSRank::reconnect_start() o.get_blocklist(&blocklist); epoch = o.get_epoch(); }); - auto killed = server->apply_blocklist(blocklist); - dout(4) << "reconnect_start: killed " << killed << " blocklisted sessions (" - << blocklist.size() << " blocklist entries, " - << sessionmap.get_sessions().size() << ")" << dendl; - if (killed) { - set_osd_epoch_barrier(epoch); - } + + apply_blocklist(blocklist, epoch); server->reconnect_clients(new C_MDS_VoidFn(this, &MDSRank::reconnect_done)); finish_contexts(g_ceph_context, waiting_for_reconnect); @@ -3444,17 +3450,17 @@ void MDSRankDispatcher::handle_osd_map() purge_queue.update_op_limit(*mdsmap); - std::set newly_blocklisted; - objecter->consume_blocklist_events(&newly_blocklisted); - auto epoch = objecter->with_osdmap([](const OSDMap &o){return o.get_epoch();}); - dout(4) << "handle_osd_map epoch " << epoch << ", " - << newly_blocklisted.size() << " new blocklist entries" << dendl; - auto victims = server->apply_blocklist(newly_blocklisted); - if (victims) { - set_osd_epoch_barrier(epoch); + // it's ok if replay state is reached via standby-replay, the + // reconnect state will journal blocklisted clients (journal + // is opened for writing in `replay_done` before moving to + // up:resolve). + if (!is_replay()) { + std::set newly_blocklisted; + objecter->consume_blocklist_events(&newly_blocklisted); + auto epoch = objecter->with_osdmap([](const OSDMap &o){return o.get_epoch();}); + apply_blocklist(newly_blocklisted, epoch); } - // By default the objecter only requests OSDMap updates on use, // we would like to always receive the latest maps in order to // apply policy based on the FULL flag. diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h index 2a4b9928158a8..2e3a9ee63520d 100644 --- a/src/mds/MDSRank.h +++ b/src/mds/MDSRank.h @@ -549,6 +549,10 @@ class MDSRank { Context *create_async_exec_context(C_ExecAndReply *ctx); + // blocklist the provided addrs and set OSD epoch barrier + // with the provided epoch. + void apply_blocklist(const std::set &addrs, epoch_t epoch); + // Incarnation as seen in MDSMap at the point where a rank is // assigned. int incarnation = 0; -- 2.39.5