From: Matan Breizman Date: Wed, 2 Nov 2022 10:40:03 +0000 (+0000) Subject: osd: Fix check_past_interval_bounds() X-Git-Tag: v16.2.14~69^2~7 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=74e2bcccd5e2be04ba0387f555ce7c2fe32f34eb;p=ceph.git osd: Fix check_past_interval_bounds() When getting the required past interval bounds we use oldest_map or current pg info (lec/ec). Before this change we set oldest_map epoch using the osd's superblock.oldest_map. The fix will use the max_oldest_map received with other peers instead since a specific osd's oldest_map can lag for a while in order to avoid large workloads. Fixes: https://tracker.ceph.com/issues/49689 Signed-off-by: Matan Breizman (cherry picked from commit 0c611b362fb9cc4225f18283f74299551c2c5953) --- diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h index 34676ee7a109..9208cc2d4764 100644 --- a/src/crimson/osd/pg.h +++ b/src/crimson/osd/pg.h @@ -349,6 +349,11 @@ public: return 0; } + epoch_t max_oldest_stored_osdmap() final { + // TODO + return 0; + } + void on_backfill_reserved() final { recovery_handler->on_backfill_reserved(); } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 5b10f146616f..fcd63807274d 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1576,6 +1576,10 @@ epoch_t PG::oldest_stored_osdmap() { return osd->get_superblock().oldest_map; } +epoch_t PG::max_oldest_stored_osdmap() { + return osd->get_superblock().max_oldest_map; +} + OstreamTemp PG::get_clog_info() { return osd->clog->info(); } diff --git a/src/osd/PG.h b/src/osd/PG.h index 61adae1205eb..408e8114ef51 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -519,6 +519,7 @@ public: void clear_primary_state() override; epoch_t oldest_stored_osdmap() override; + epoch_t max_oldest_stored_osdmap() override; OstreamTemp get_clog_error() override; OstreamTemp get_clog_info() override; OstreamTemp get_clog_debug() override; diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index 9709f3ce1444..5fa76a7a7b23 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -953,7 +953,9 @@ static pair get_required_past_interval_bounds( void PeeringState::check_past_interval_bounds() const { - auto oldest_epoch = pl->oldest_stored_osdmap(); + // a specific OSD's oldest_map can lag for a while, therfore + // use the maximum MOSDMap.oldest_map received with peers. + auto oldest_epoch = pl->max_oldest_stored_osdmap(); auto rpib = get_required_past_interval_bounds( info, oldest_epoch); diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index 2cc340cb9df9..d3ac117b9616 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -391,6 +391,7 @@ public: virtual void on_active_actmap() = 0; virtual void on_active_advmap(const OSDMapRef &osdmap) = 0; virtual epoch_t oldest_stored_osdmap() = 0; + virtual epoch_t max_oldest_stored_osdmap() = 0; // ============ recovery reservation notifications ========== virtual void on_backfill_reserved() = 0;