osd: distinguish between osd_pg_stat_report_max_(epoch|seconds)

author zhangjianwei2 <zhangjianwei2_yewu@cmss.chinamobile.com>

Tue, 14 Nov 2023 08:45:43 +0000 (16:45 +0800)

committer zhangjianwei2 <zhangjianwei2_yewu@cmss.chinamobile.com>

Thu, 23 Nov 2023 01:29:09 +0000 (09:29 +0800)
author zhangjianwei2 <zhangjianwei2_yewu@cmss.chinamobile.com>
Tue, 14 Nov 2023 08:45:43 +0000 (16:45 +0800)
committer zhangjianwei2 <zhangjianwei2_yewu@cmss.chinamobile.com>
Thu, 23 Nov 2023 01:29:09 +0000 (09:29 +0800)
diff --git a/qa/standalone/scrub/osd-scrub-snaps.sh b/qa/standalone/scrub/osd-scrub-snaps.sh

index c543b48a19ccc3b984d7c0d19994ce4ec00a5b64..40bd6a26e74d485b6cdd88c6932926397bfa00ec 100755 (executable)
--- a/qa/standalone/scrub/osd-scrub-snaps.sh
+++ b/qa/standalone/scrub/osd-scrub-snaps.sh
@@ -209,14 +209,16 @@ function TEST_scrub_snaps() {
      done
      ceph tell osd.* config set osd_shallow_scrub_chunk_max 25
      ceph tell osd.* config set osd_shallow_scrub_chunk_min 5
-    ceph tell osd.* config set osd_pg_stat_report_interval_max 1
+    ceph tell osd.* config set osd_pg_stat_report_interval_max_seconds 1
+    ceph tell osd.* config set osd_pg_stat_report_interval_max_epochs 1
  
  
      wait_for_clean || return 1
  
      ceph tell osd.* config get osd_shallow_scrub_chunk_max
      ceph tell osd.* config get osd_shallow_scrub_chunk_min
-    ceph tell osd.* config get osd_pg_stat_report_interval_max
+    ceph tell osd.* config get osd_pg_stat_report_interval_max_seconds
+    ceph tell osd.* config get osd_pg_stat_report_interval_max_epochs
      ceph tell osd.* config get osd_scrub_chunk_max
      ceph tell osd.* config get osd_scrub_chunk_min
  
@@ -772,7 +774,8 @@ function _scrub_snaps_multi() {
      ceph tell osd.* config set osd_shallow_scrub_chunk_max 3
      ceph tell osd.* config set osd_shallow_scrub_chunk_min 3
      ceph tell osd.* config set osd_scrub_chunk_min 3
-    ceph tell osd.* config set osd_pg_stat_report_interval_max 1
+    ceph tell osd.* config set osd_pg_stat_report_interval_max_seconds 1
+    ceph tell osd.* config set osd_pg_stat_report_interval_max_epochs 1
      wait_for_clean || return 1
  
      local pgid="${poolid}.0"
@@ -1163,7 +1166,7 @@ fi
  function TEST_scrub_snaps_replica() {
      local dir=$1
      ORIG_ARGS=$CEPH_ARGS
-    CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max=1"
+    CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max_seconds=1 --osd_pg_stat_report_interval_max_epochs=1"
      _scrub_snaps_multi $dir replica
      err=$?
      CEPH_ARGS=$ORIG_ARGS
@@ -1173,7 +1176,7 @@ function TEST_scrub_snaps_replica() {
  function TEST_scrub_snaps_primary() {
      local dir=$1
      ORIG_ARGS=$CEPH_ARGS
-    CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max=1"
+    CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max_seconds=1 --osd_pg_stat_report_interval_max_epochs=1"
      _scrub_snaps_multi $dir primary
      err=$?
      CEPH_ARGS=$ORIG_ARGS
diff --git a/qa/standalone/scrub/scrub-helpers.sh b/qa/standalone/scrub/scrub-helpers.sh

index 6816d71de32cf56982142c7fc2ba9422dcff54e0..3b0590fdfdef47d82e106c77b08d1bf71f5b0bc9 100644 (file)
--- a/qa/standalone/scrub/scrub-helpers.sh
+++ b/qa/standalone/scrub/scrub-helpers.sh
@@ -239,7 +239,8 @@ function standard_scrub_cluster() {
              --osd_scrub_interval_randomize_ratio=0 \
              --osd_scrub_backoff_ratio=0.0 \
              --osd_pool_default_pg_autoscale_mode=off \
-            --osd_pg_stat_report_interval_max=1 \
+            --osd_pg_stat_report_interval_max_seconds=1 \
+            --osd_pg_stat_report_interval_max_epochs=1 \
              $extra_pars"
  
      for osd in $(seq 0 $(expr $OSDS - 1))
diff --git a/src/common/options/global.yaml.in b/src/common/options/global.yaml.in

index 48c6788a88b327f333b1c4699bce8929cbab6b49..087ab80cb4d6579973b58de8de0bad0a3ceb478d 100644 (file)
--- a/src/common/options/global.yaml.in
+++ b/src/common/options/global.yaml.in
@@ -2903,9 +2903,19 @@ options:
    default: 5_min
    with_legacy: true
  # report pg stats for any given pg at least this often
-- name: osd_pg_stat_report_interval_max
+- name: osd_pg_stat_report_interval_max_seconds
    type: int
    level: advanced
+  desc: The maximum interval seconds for update pg's reported_epoch,
+        benefit for osdmap trim when osdmap not change frequently.
+  default: 500
+  with_legacy: true
+- name: osd_pg_stat_report_interval_max_epochs
+  type: int
+  level: advanced
+  desc: The maximum interval by which pg's reported_epoch lags behind,
+        otherwise, pg's reported_epoch must be updated,
+        benefit for osdmap trim when osdmap changes frequently.
    default: 500
    with_legacy: true
  # Max number of snap intervals to report to mgr in pg_stat_t
diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc

index 2c41b7b7118961976d0751499f55b9ce82dd1a62..45b593f252c31e9b5fc81c1937c761a604f9aabf 100644 (file)
--- a/src/osd/PeeringState.cc
+++ b/src/osd/PeeringState.cc
@@ -3890,8 +3890,6 @@ std::optional<pg_stat_t> PeeringState::prepare_stats_for_publish(
  
    pg_stat_t pre_publish = info.stats;
    pre_publish.stats.add(unstable_stats);
-  utime_t cutoff = now;
-  cutoff -= cct->_conf->osd_pg_stat_report_interval_max;
  
    // share (some of) our purged_snaps via the pg_stats. limit # of intervals
    // because we don't want to make the pg_stat_t structures too expensive.
@@ -3906,8 +3904,22 @@ std::optional<pg_stat_t> PeeringState::prepare_stats_for_publish(
    psdout(20) << "reporting purged_snaps "
              << pre_publish.purged_snaps << dendl;
  
+  // when there is no change in osdmap,
+  // update info.stats.reported_epoch by the number of time seconds.
+  utime_t cutoff_time = now;
+  cutoff_time -= cct->_conf->osd_pg_stat_report_interval_max_seconds;
+  bool is_time_expired = cutoff_time > info.stats.last_fresh ? true : false;
+
+  // 500 epoch osdmaps are also the minimum number of osdmaps that mon must retain.
+  // if info.stats.reported_epoch less than current osdmap epoch exceeds 500 osdmaps,
+  // it can be considered that the one reported by pgid is too old and needs to be updated.
+  // to facilitate mon trim osdmaps
+  epoch_t cutoff_epoch = info.stats.reported_epoch;
+  cutoff_epoch += cct->_conf->osd_pg_stat_report_interval_max_epochs;
+  bool is_epoch_behind = cutoff_epoch < get_osdmap_epoch() ? true : false;
+
    if (pg_stats_publish && pre_publish == *pg_stats_publish &&
-      info.stats.last_fresh > cutoff) {
+      (!is_epoch_behind && !is_time_expired)) {
      psdout(15) << "publish_stats_to_osd " << pg_stats_publish->reported_epoch
                << ": no change since " << info.stats.last_fresh << dendl;
      return std::nullopt;
@@ -5978,14 +5990,12 @@ boost::statechart::result PeeringState::Active::react(const AdvMap& advmap)
      return forward_event();
    }
    psdout(10) << "Active advmap" << dendl;
-  bool need_publish = false;
  
    pl->on_active_advmap(advmap.osdmap);
    if (ps->dirty_big_info) {
      // share updated purged_snaps to mgr/mon so that we (a) stop reporting
      // purged snaps and (b) perhaps share more snaps that we have purged
      // but didn't fit in pg_stat_t.
-    need_publish = true;
      ps->share_pg_info();
    }
  
@@ -6027,18 +6037,9 @@ boost::statechart::result PeeringState::Active::react(const AdvMap& advmap)
        ps->state_set(PG_STATE_UNDERSIZED);
      }
      // degraded changes will be detected by call from publish_stats_to_osd()
-    need_publish = true;
-  }
-
-  // if we haven't reported our PG stats in a long time, do so now.
-  if (ps->info.stats.reported_epoch + ps->cct->_conf->osd_pg_stat_report_interval_max < advmap.osdmap->get_epoch()) {
-    psdout(20) << "reporting stats to osd after " << (advmap.osdmap->get_epoch() - ps->info.stats.reported_epoch)
-                      << " epochs" << dendl;
-    need_publish = true;
    }
  
-  if (need_publish)
-    pl->publish_stats_to_osd();
+  pl->publish_stats_to_osd();
  
    if (ps->check_prior_readable_down_osds(advmap.osdmap)) {
      pl->recheck_readable();
author	zhangjianwei2 <zhangjianwei2_yewu@cmss.chinamobile.com>
	Tue, 14 Nov 2023 08:45:43 +0000 (16:45 +0800)
committer	zhangjianwei2 <zhangjianwei2_yewu@cmss.chinamobile.com>
	Thu, 23 Nov 2023 01:29:09 +0000 (09:29 +0800)
qa/standalone/scrub/osd-scrub-snaps.sh		patch \| blob \| history
qa/standalone/scrub/scrub-helpers.sh		patch \| blob \| history
src/common/options/global.yaml.in		patch \| blob \| history
src/osd/PeeringState.cc		patch \| blob \| history