]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd: add debug logs for snaptrim and scrub background_process_lock 69240/head
authorAishwarya Mathuria <amathuri@redhat.com>
Tue, 2 Jun 2026 13:37:50 +0000 (19:07 +0530)
committerAishwarya Mathuria <amathuri@redhat.com>
Wed, 3 Jun 2026 07:17:10 +0000 (12:47 +0530)
Add targeted debug logs to help diagnose snaptrim stalls under thrash/pggrow

Signed-off-by: Aishwarya Mathuria <amathuri@redhat.com>
src/crimson/osd/osd_operations/scrub_events.cc
src/crimson/osd/osd_operations/snaptrim_event.cc
src/crimson/osd/scrub/pg_scrubber.cc

index 2403c3b961607eeb64ff8853eaadbbc9069ef359..196054542b03c1f476f6068757f98e548c11a49a 100644 (file)
@@ -109,6 +109,7 @@ ScrubReserveRange::ifut<> ScrubReserveRange::run(PG &pg)
 {
   LOG_PREFIX(ScrubReserveRange::run);
   DEBUGDPP("", pg);
+  DEBUGDPP("waiting for pg background_process_lock", pg);
   return pg.background_process_lock.lock(
   ).then_interruptible([FNAME, this, &pg] {
     DEBUGDPP("pg_background_io_mutex locked", pg);
@@ -130,8 +131,9 @@ ScrubReserveRange::ifut<> ScrubReserveRange::run(PG &pg)
       return scrubber.machine.process_event(
        scrub::ScrubContext::reserve_range_complete_t{p->version});
     }
-  }).finally([&pg, this] {
+  }).finally([FNAME, &pg, this] {
     if (!blocked_set) {
+      DEBUGDPP("releasing pg background_process_lock (reserve not set)", pg);
       pg.background_process_lock.unlock();
     }
   });
index c222f506a8f84053f2660af3d0f7dffeacc45d36..94a2dfd7a7a78d2fe59d65410d9b0096fa634997 100644 (file)
@@ -78,8 +78,11 @@ SnapTrimEvent::start()
    * pipeline stages. https://tracker.ceph.com/issues/66473 */
   ShardServices &shard_services = pg->get_shard_services();
   {
+    logger().debug("{}: waiting for pg background_process_lock", *this);
     co_await pg->background_process_lock.lock_with_op(*this);
+    logger().debug("{}: acquired pg background_process_lock", *this);
     auto unlocker = seastar::defer([this] {
+      logger().debug("{}: releasing pg background_process_lock", *this);
       pg->background_process_lock.unlock();
     });
 
@@ -100,6 +103,8 @@ SnapTrimEvent::start()
     });
     auto to_trim = co_await std::move(to_trim_fut);
 
+    logger().debug("{}: snap_mapper returned {} object(s) for this batch",
+                   *this, to_trim.size());
     if (to_trim.empty()) {
       // the legit ENOENT -> done
       logger().debug("{}: to_trim is empty! Stopping iteration", *this);
@@ -115,8 +120,9 @@ SnapTrimEvent::start()
          snapid));
     }
 
-    logger().debug("{}: awaiting completion", *this);
+    logger().debug("{}: awaiting completion ({} subevent(s))", *this, to_trim.size());
     co_await subop_blocker.interruptible_wait_completion();
+    logger().debug("{}: all subevents completed this round", *this);
   }
 
   if (needs_pause) {
@@ -405,6 +411,7 @@ SnapTrimObjSubEvent::start()
     std::ignore = handle.complete().then([opref = std::move(opref)] {});
   });
 
+  logger().debug("{}: entering obc pipeline process", *this);
   co_await enter_stage<interruptor>(
     obc_orderer->obc_pp().process);
 
@@ -428,11 +435,14 @@ SnapTrimObjSubEvent::start()
   auto all_completed = interruptor::now();
   {
     auto unlocker = seastar::defer([this] {
+      logger().debug("{}: releasing submit_lock", *this);
       pg->submit_lock.unlock();
     });
     // as with PG::submit_executer, we need to build the pg log entries
     // and submit the transaction atomically
+    logger().debug("{}: acquiring submit_lock", *this);
     co_await interruptor::make_interruptible(pg->submit_lock.lock());
+    logger().debug("{}: submit_lock held", *this);
 
     logger().debug("{}: calling remove_or_update obc={}",
                   *this, obc_manager.get_obc()->get_oid());
@@ -448,11 +458,14 @@ SnapTrimObjSubEvent::start()
       std::move(osd_op_p),
       std::move(log_entries)
     );
+    logger().debug("{}: transaction submitted, waiting local apply", *this);
     co_await std::move(submitted);
   }
 
+  logger().debug("{}: entering wait_repop", *this);
   co_await enter_stage<interruptor>(obc_orderer->obc_pp().wait_repop);
 
+  logger().debug("{}: waiting repop completion future", *this);
   co_await std::move(all_completed);
 
   logger().debug("{}: completed", *this);
index 128e28671c922853e51061ff687958cde28921f9..30f22cb738e8cce85fd2de69ddc1904a76717744 100644 (file)
@@ -188,7 +188,8 @@ void PGScrubber::release_range()
     DEBUGDPP("range not reserved, skipping", pg);
     return;
   }
-  DEBUGDPP("blocked: {}", pg, *blocked);
+  DEBUGDPP("blocked: {}, releasing pg background_process_lock (range {} .. {})",
+          pg, *blocked, blocked->begin, blocked->end);
   pg.background_process_lock.unlock();
   blocked->p.set_value();
   blocked = std::nullopt;