From 27d98d24194545a485aff1e94592c1bd45f6c2a2 Mon Sep 17 00:00:00 2001 From: Joao Eduardo Luis Date: Tue, 1 May 2012 19:55:30 +0100 Subject: [PATCH] OpRequest: only show a small set of the oldest messages, instead of all. Signed-off-by: Joao Eduardo Luis Reviewed-by: Greg Farnum --- src/common/config_opts.h | 1 + src/osd/OpRequest.cc | 42 +++++++++++++++++++++++++++++++++++----- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 1d5ba22057d76..7b6d6f3c80cd3 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -325,6 +325,7 @@ OPTION(osd_kill_backfill_at, OPT_INT, 0) OPTION(osd_min_pg_log_entries, OPT_U32, 1000) // number of entries to keep in the pg log when trimming it OPTION(osd_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy OPTION(osd_command_max_records, OPT_INT, 256) +OPTION(osd_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go OPTION(filestore, OPT_BOOL, false) OPTION(filestore_debug_omap_check, OPT_BOOL, 0) // Expensive debugging check on sync // Use omap for xattrs for attrs over diff --git a/src/osd/OpRequest.cc b/src/osd/OpRequest.cc index f10a3a944d847..5203b57407d40 100644 --- a/src/osd/OpRequest.cc +++ b/src/osd/OpRequest.cc @@ -3,6 +3,7 @@ #include "OpRequest.h" #include "common/Formatter.h" #include +#include #include "common/debug.h" #include "common/config.h" #include "msg/Message.h" @@ -74,24 +75,55 @@ bool OpTracker::check_ops_in_flight(std::vector &warning_vector) utime_t too_old = now; too_old -= g_conf->osd_op_complaint_time; + utime_t oldest_secs = now - ops_in_flight.front()->received_time; + dout(10) << "ops_in_flight.size: " << ops_in_flight.size() - << "; oldest is " << now - ops_in_flight.front()->received_time + << "; oldest is " << oldest_secs << " seconds old" << dendl; + xlist::iterator i = ops_in_flight.begin(); - warning_vector.reserve(ops_in_flight.size()); + warning_vector.reserve(g_conf->osd_op_log_threshold + 1); + warning_vector.push_back(""); + + int total = 0, skipped = 0; + while (!i.end() && (*i)->received_time < too_old) { // exponential backoff of warning intervals if ( ( (*i)->received_time + (g_conf->osd_op_complaint_time * (*i)->warn_interval_multiplier) )< now) { + + if (total++ >= g_conf->osd_op_log_threshold) + break; + stringstream ss; - ss << "old request " << *((*i)->request) << " received at " - << (*i)->received_time << " currently " << (*i)->state_string(); - (*i)->warn_interval_multiplier *= 2; + ss << "received at " << (*i)->received_time + << ": " << *((*i)->request) << " currently " << (*i)->state_string(); warning_vector.push_back(ss.str()); + // only those that have been shown will backoff + (*i)->warn_interval_multiplier *= 2; + } else { + skipped++; } ++i; } + + stringstream ss; + ss << "there are " << ops_in_flight.size() + << " ops in flight; the oldest is blocked for >" + << oldest_secs << " secs"; + + if (total) { + ss << "; these are "; + if (!skipped) + ss << "the " << warning_vector.size()-1 << " oldest:"; + else { + ss << warning_vector.size()-1 << " amongst the oldest (" + << skipped << " precedes them):"; + } + } + warning_vector[0] = ss.str(); + return warning_vector.size(); } -- 2.39.5