From: zhangjianwei2 Date: Tue, 14 Nov 2023 08:45:43 +0000 (+0800) Subject: osd: distinguish between osd_pg_stat_report_max_(epoch|seconds) X-Git-Tag: testing/wip-batrick-testing-20240411.154038~585^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=629cb3232d1e0dc2172d24602286bcfec9ef0323;p=ceph-ci.git osd: distinguish between osd_pg_stat_report_max_(epoch|seconds) osd_pg_stat_report_max was previously used as either a max time in seconds or a max number of epochs. Instead, separate into two configs and adjust PeeringState::prepare_stats_for_publish to check both. Additionally, this commit removes a superfluous check in PeeringState::Active::react(const AdvMap&) and calls publish_stats_to_osd unconditionally as with other callers in PeeringState. Fixes: https://tracker.ceph.com/issues/63520 Signed-off-by: zhangjianwei2 --- diff --git a/qa/standalone/scrub/osd-scrub-snaps.sh b/qa/standalone/scrub/osd-scrub-snaps.sh index c543b48a19c..40bd6a26e74 100755 --- a/qa/standalone/scrub/osd-scrub-snaps.sh +++ b/qa/standalone/scrub/osd-scrub-snaps.sh @@ -209,14 +209,16 @@ function TEST_scrub_snaps() { done ceph tell osd.* config set osd_shallow_scrub_chunk_max 25 ceph tell osd.* config set osd_shallow_scrub_chunk_min 5 - ceph tell osd.* config set osd_pg_stat_report_interval_max 1 + ceph tell osd.* config set osd_pg_stat_report_interval_max_seconds 1 + ceph tell osd.* config set osd_pg_stat_report_interval_max_epochs 1 wait_for_clean || return 1 ceph tell osd.* config get osd_shallow_scrub_chunk_max ceph tell osd.* config get osd_shallow_scrub_chunk_min - ceph tell osd.* config get osd_pg_stat_report_interval_max + ceph tell osd.* config get osd_pg_stat_report_interval_max_seconds + ceph tell osd.* config get osd_pg_stat_report_interval_max_epochs ceph tell osd.* config get osd_scrub_chunk_max ceph tell osd.* config get osd_scrub_chunk_min @@ -772,7 +774,8 @@ function _scrub_snaps_multi() { ceph tell osd.* config set osd_shallow_scrub_chunk_max 3 ceph tell osd.* config set osd_shallow_scrub_chunk_min 3 ceph tell osd.* config set osd_scrub_chunk_min 3 - ceph tell osd.* config set osd_pg_stat_report_interval_max 1 + ceph tell osd.* config set osd_pg_stat_report_interval_max_seconds 1 + ceph tell osd.* config set osd_pg_stat_report_interval_max_epochs 1 wait_for_clean || return 1 local pgid="${poolid}.0" @@ -1163,7 +1166,7 @@ fi function TEST_scrub_snaps_replica() { local dir=$1 ORIG_ARGS=$CEPH_ARGS - CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max=1" + CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max_seconds=1 --osd_pg_stat_report_interval_max_epochs=1" _scrub_snaps_multi $dir replica err=$? CEPH_ARGS=$ORIG_ARGS @@ -1173,7 +1176,7 @@ function TEST_scrub_snaps_replica() { function TEST_scrub_snaps_primary() { local dir=$1 ORIG_ARGS=$CEPH_ARGS - CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max=1" + CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max_seconds=1 --osd_pg_stat_report_interval_max_epochs=1" _scrub_snaps_multi $dir primary err=$? CEPH_ARGS=$ORIG_ARGS diff --git a/qa/standalone/scrub/scrub-helpers.sh b/qa/standalone/scrub/scrub-helpers.sh index 6816d71de32..3b0590fdfde 100644 --- a/qa/standalone/scrub/scrub-helpers.sh +++ b/qa/standalone/scrub/scrub-helpers.sh @@ -239,7 +239,8 @@ function standard_scrub_cluster() { --osd_scrub_interval_randomize_ratio=0 \ --osd_scrub_backoff_ratio=0.0 \ --osd_pool_default_pg_autoscale_mode=off \ - --osd_pg_stat_report_interval_max=1 \ + --osd_pg_stat_report_interval_max_seconds=1 \ + --osd_pg_stat_report_interval_max_epochs=1 \ $extra_pars" for osd in $(seq 0 $(expr $OSDS - 1)) diff --git a/src/common/options/global.yaml.in b/src/common/options/global.yaml.in index 48c6788a88b..087ab80cb4d 100644 --- a/src/common/options/global.yaml.in +++ b/src/common/options/global.yaml.in @@ -2903,9 +2903,19 @@ options: default: 5_min with_legacy: true # report pg stats for any given pg at least this often -- name: osd_pg_stat_report_interval_max +- name: osd_pg_stat_report_interval_max_seconds type: int level: advanced + desc: The maximum interval seconds for update pg's reported_epoch, + benefit for osdmap trim when osdmap not change frequently. + default: 500 + with_legacy: true +- name: osd_pg_stat_report_interval_max_epochs + type: int + level: advanced + desc: The maximum interval by which pg's reported_epoch lags behind, + otherwise, pg's reported_epoch must be updated, + benefit for osdmap trim when osdmap changes frequently. default: 500 with_legacy: true # Max number of snap intervals to report to mgr in pg_stat_t diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index 2c41b7b7118..45b593f252c 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -3890,8 +3890,6 @@ std::optional PeeringState::prepare_stats_for_publish( pg_stat_t pre_publish = info.stats; pre_publish.stats.add(unstable_stats); - utime_t cutoff = now; - cutoff -= cct->_conf->osd_pg_stat_report_interval_max; // share (some of) our purged_snaps via the pg_stats. limit # of intervals // because we don't want to make the pg_stat_t structures too expensive. @@ -3906,8 +3904,22 @@ std::optional PeeringState::prepare_stats_for_publish( psdout(20) << "reporting purged_snaps " << pre_publish.purged_snaps << dendl; + // when there is no change in osdmap, + // update info.stats.reported_epoch by the number of time seconds. + utime_t cutoff_time = now; + cutoff_time -= cct->_conf->osd_pg_stat_report_interval_max_seconds; + bool is_time_expired = cutoff_time > info.stats.last_fresh ? true : false; + + // 500 epoch osdmaps are also the minimum number of osdmaps that mon must retain. + // if info.stats.reported_epoch less than current osdmap epoch exceeds 500 osdmaps, + // it can be considered that the one reported by pgid is too old and needs to be updated. + // to facilitate mon trim osdmaps + epoch_t cutoff_epoch = info.stats.reported_epoch; + cutoff_epoch += cct->_conf->osd_pg_stat_report_interval_max_epochs; + bool is_epoch_behind = cutoff_epoch < get_osdmap_epoch() ? true : false; + if (pg_stats_publish && pre_publish == *pg_stats_publish && - info.stats.last_fresh > cutoff) { + (!is_epoch_behind && !is_time_expired)) { psdout(15) << "publish_stats_to_osd " << pg_stats_publish->reported_epoch << ": no change since " << info.stats.last_fresh << dendl; return std::nullopt; @@ -5978,14 +5990,12 @@ boost::statechart::result PeeringState::Active::react(const AdvMap& advmap) return forward_event(); } psdout(10) << "Active advmap" << dendl; - bool need_publish = false; pl->on_active_advmap(advmap.osdmap); if (ps->dirty_big_info) { // share updated purged_snaps to mgr/mon so that we (a) stop reporting // purged snaps and (b) perhaps share more snaps that we have purged // but didn't fit in pg_stat_t. - need_publish = true; ps->share_pg_info(); } @@ -6027,18 +6037,9 @@ boost::statechart::result PeeringState::Active::react(const AdvMap& advmap) ps->state_set(PG_STATE_UNDERSIZED); } // degraded changes will be detected by call from publish_stats_to_osd() - need_publish = true; - } - - // if we haven't reported our PG stats in a long time, do so now. - if (ps->info.stats.reported_epoch + ps->cct->_conf->osd_pg_stat_report_interval_max < advmap.osdmap->get_epoch()) { - psdout(20) << "reporting stats to osd after " << (advmap.osdmap->get_epoch() - ps->info.stats.reported_epoch) - << " epochs" << dendl; - need_publish = true; } - if (need_publish) - pl->publish_stats_to_osd(); + pl->publish_stats_to_osd(); if (ps->check_prior_readable_down_osds(advmap.osdmap)) { pl->recheck_readable();