]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: collect total snap-trim queueus length
authorRonen Friedman <rfriedma@redhat.com>
Wed, 29 Apr 2026 03:45:34 +0000 (03:45 +0000)
committerRonen Friedman <rfriedma@redhat.com>
Thu, 21 May 2026 18:10:17 +0000 (18:10 +0000)
Periodically collect the total snap-trim
queue length across all PGs. Expose it through
OSDService::get_snap_trim_queue_total().

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/OSD.cc
src/osd/OSD.h
src/osd/scrubber/osd_scrub.cc
src/osd/scrubber/osd_scrub_sched.h

index b281be2f3796e8feb546239f98d433deba083110..6e5e7b7b335d7ff3b08f429476237acd184a95cd 100644 (file)
@@ -6469,6 +6469,12 @@ void OSD::tick_without_osd_lock()
   }
 
   if (is_active()) {
+    constexpr uint_fast16_t snap_trim_scan_interval = 5;
+    if (++trim_queue_length_countdown >= snap_trim_scan_interval) {
+      trim_queue_length_countdown = 0;
+      service.snap_trim_queue_total =
+       service.calc_snap_trim_queue_total();
+    }
     service.get_scrub_services().initiate_scrub(service.is_recovery_active());
     service.promote_throttle_recalibrate();
     resume_creating_pg();
@@ -7890,6 +7896,22 @@ std::optional<PGLockWrapper> OSDService::get_locked_pg(spg_t pgid)
 }
 
 
+uint64_t OSDService::calc_snap_trim_queue_total()
+{
+  std::vector<spg_t> pgids;
+  osd->_get_pgids(&pgids);
+  uint64_t total = 0;
+  for (auto& pgid : pgids) {
+    if (auto locked_pg = get_locked_pg(pgid)) {
+      const auto& pg = locked_pg->pg();
+      if (pg->is_primary()) {
+       total += pg->get_snap_trimq_size();
+      }
+    }
+  }
+  return total;
+}
+
 MPGStats* OSD::collect_pg_stats()
 {
   dout(15) << __func__ << dendl;
index 3b3e7092650d4e3a2be2351fee2aec1a1e78d029..8bc7a7f48e05b055187f4f637d596d38b0d8c4bf 100644 (file)
@@ -152,6 +152,18 @@ public:
   }
 
   int get_nodeid() const final { return whoami; }
+
+  /// iterate over all PGs, summing their snap trim queue lengths
+  uint64_t calc_snap_trim_queue_total();
+  uint64_t get_snap_trim_queue_total() const {
+    // the cached value, calculated by calc_snap_trim_queue_total()
+    return snap_trim_queue_total;
+  }
+
+  // not atomic: both write (tick_without_osd_lock) and read (initiate_scrub,
+  // called from the same timer callback) are on the same thread
+  uint64_t snap_trim_queue_total{0};
+
 private:
   OSDMapRef osdmap;
 
@@ -1240,6 +1252,7 @@ class OSD : public Dispatcher,
   // Tick timer for those stuff that do not need osd_lock
   ceph::mutex tick_timer_lock = ceph::make_mutex("OSD::tick_timer_lock");
   SafeTimer tick_timer_without_osd_lock;
+  uint_fast16_t trim_queue_length_countdown = 0;
   std::string gss_ktfile_client{};
 
 public:
index 11e9756af20a447a0bb86393939c7eb13fc70d34..f8e2e40a31eab982caa2da0bd96b57f1f7881805 100644 (file)
@@ -107,6 +107,11 @@ void OsdScrub::initiate_scrub(bool is_recovery_active)
        << dendl;
   }
 
+  {
+    auto stqt = m_osd_svc.get_snap_trim_queue_total();
+    dout(20) << fmt::format("snap_trim_queue_total: {}", stqt) << dendl;
+  }
+
   const utime_t scrub_time = ceph_clock_now();
 
   // check the OSD-wide environment conditions (scrub resources, time, etc.).
index 929bdc17f8632daeadb4ea01a5a9d3af99ccf06a..92140ba521977d7f1721e02eae215c48595366a5 100644 (file)
@@ -140,6 +140,8 @@ class ScrubSchedListener {
    */
   virtual AsyncReserver<spg_t, Finisher>& get_scrub_reserver() = 0;
 
+  virtual uint64_t get_snap_trim_queue_total() const = 0;
+
   virtual ~ScrubSchedListener() {}
 };