From 76e5f5caad61cfe63924eb79f7df1b35f8c8afc1 Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Mon, 12 Jul 2021 14:26:48 +0000 Subject: [PATCH] crimson/osd: prevent premature OSD activation. In contrast to the classical OSD: ``` int OSD::init() { // ... { epoch_t bind_epoch = osdmap->get_epoch(); service.set_epochs(NULL, NULL, &bind_epoch); } // ... // load up pgs (as they previously existed) load_pgs(); ``` crimson doesn't set the `bind_epoch` when initializing. The net result is going active prematurely which happens because the 3rd condition (`bind_epoch < osdmap->get_up_from(whoami)`) is always true. ``` if (osdmap->is_up(whoami) && osdmap->get_addrs(whoami) == public_msgr->get_myaddrs() && bind_epoch < osdmap->get_up_from(whoami)) { if (state.is_booting()) { logger().info("osd.{}: activating...", whoami); ``` Nullifying it translates the "is it activated?" check basically into "is it up?" verification. This is problematic in a situation like: 1. Primary got new OSDMap but replica has not. 2. Replica restarts, sends `MOSDBoot` and receives the newer map from the previous point. 3. Primary sends a message that is unexpected by replica. 4. Monitor publishes a new OSDMap diven by the `MOSDBoot`. Signed-off-by: Radoslaw Zarzynski --- src/crimson/osd/osd.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index 539e3a65b6737..6697d1bd9f428 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -264,6 +264,7 @@ seastar::future<> OSD::start() shard_services.update_map(map); osdmap_gate.got_map(map->get_epoch()); osdmap = std::move(map); + bind_epoch = osdmap->get_epoch(); return load_pgs(); }).then([this] { -- 2.39.5