]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: Restrict logging in MissingLoc::add_source_info
authorBill Scales <bill_scales@uk.ibm.com>
Fri, 21 Nov 2025 10:38:44 +0000 (10:38 +0000)
committerBill Scales <bill_scales@uk.ibm.com>
Thu, 27 Nov 2025 07:45:07 +0000 (07:45 +0000)
add_source_info can generate an excessive amount of logging
if a PG has thousands of missing objects. When a system is
under load and threads are repeatedly context switching this
can lead to timeouts (tests showed this function taking up
to 10 seconds to execute with 99% of that time being in
logging calls where the thread was being pre-empted).
Stopping logging after the function has been running for
more than 0.5 seconds strikes a balance between providing
sufficient informtion to debug problems while providing
more stability when a system is heavily loaded.

Signed-off-by: Bill Scales <bill_scales@uk.ibm.com>
src/osd/MissingLoc.cc

index 211b3046de79e2a7e39908ea4a487e181b6aa366..e456a497d5b5edfc615d2c1cc94fca50eb6501ce 100644 (file)
@@ -75,44 +75,65 @@ bool MissingLoc::add_source_info(
   unsigned loop = 0;
   bool sources_updated = false;
   // found items?
+
+  auto start = ceph::coarse_mono_clock::now();
+  bool suppress_logging = false;
   for (auto p = needs_recovery_map.begin();
        p != needs_recovery_map.end();
        ++p) {
     const hobject_t &soid(p->first);
     eversion_t need = p->second.need;
-    if (handle && ++loop >= cct->_conf->osd_loop_before_reset_tphandle) {
-      handle->reset_tp_timeout();
+    if (++loop >= cct->_conf->osd_loop_before_reset_tphandle) {
+      if (handle) {
+       handle->reset_tp_timeout();
+      }
+      auto now = ceph::coarse_mono_clock::now();
+      if (std::chrono::duration<double>(now - start).count() > 0.5) {
+       // Suppress logging if this function has be running more than
+       // 0.5 seconds
+       suppress_logging = true;
+      }
       loop = 0;
     }
     if (p->second.is_delete()) {
-      ldout(cct, 10) << __func__ << " " << soid
-                    << " delete, ignoring source" << dendl;
+      if (!suppress_logging) {
+        ldout(cct, 10) << __func__ << " " << soid
+                       << " delete, ignoring source" << dendl;
+      }
       continue;
     }
     if (oinfo.last_update < need) {
-      ldout(cct, 10) << "search_for_missing " << soid << " " << need
-                    << " also missing on osd." << fromosd
-                    << " (last_update " << oinfo.last_update
-                    << " < needed " << need << ")" << dendl;
+      if (!suppress_logging) {
+        ldout(cct, 10) << "search_for_missing " << soid << " " << need
+                       << " also missing on osd." << fromosd
+                       << " (last_update " << oinfo.last_update
+                       << " < needed " << need << ")" << dendl;
+      }
       continue;
     }
     if (p->first >= oinfo.last_backfill) {
       // FIXME: this is _probably_ true, although it could conceivably
       // be in the undefined region!  Hmm!
-      ldout(cct, 10) << "search_for_missing " << soid << " " << need
-                    << " also missing on osd." << fromosd
-                    << " (past last_backfill " << oinfo.last_backfill
-                    << ")" << dendl;
+      if (!suppress_logging) {
+        ldout(cct, 10) << "search_for_missing " << soid << " " << need
+                       << " also missing on osd." << fromosd
+                       << " (past last_backfill " << oinfo.last_backfill
+                       << ")" << dendl;
+      }
       continue;
     }
     if (omissing.is_missing(soid)) {
-      ldout(cct, 10) << "search_for_missing " << soid << " " << need
-                    << " also missing on osd." << fromosd << dendl;
+      if (!suppress_logging) {
+        ldout(cct, 10) << "search_for_missing " << soid << " " << need
+                       << " also missing on osd." << fromosd << dendl;
+      }
       continue;
     }
 
-    ldout(cct, 10) << "search_for_missing " << soid << " " << need
-                  << " is on osd." << fromosd << dendl;
+    if (!suppress_logging) {
+      ldout(cct, 10) << "search_for_missing " << soid << " " << need
+                     << " is on osd." << fromosd << dendl;
+    }
 
     {
       auto p = missing_loc.find(soid);
@@ -133,7 +154,7 @@ bool MissingLoc::add_source_info(
   }
 
   ldout(cct, 20) << "needs_recovery_map missing " << needs_recovery_map
-                << dendl;
+                 << dendl;
   return found_missing;
 }