From b2bfd6cc0ba697414a0abca1bb8a1074abb08993 Mon Sep 17 00:00:00 2001 From: xie xingguo Date: Thu, 18 Feb 2016 15:33:19 +0800 Subject: [PATCH] common/TrackedOp: fix inaccurate counting for total slow requests In the original design there are two counters in charge of collecting potentially problematic requests, namely 'slow' and 'warned'. Counter 'slow' is responsible for capturing all the requests which have already hit the "complain" limit and shall be marked as "slow" while counter 'warned' is responsible for countering those requests which have already hit the "warning interval" and thus shall be logged and then outputted. The problem here is if 'warned' counter hits the log_threshold, we will quit the entire for loop but there may be residual shard_queues which may still containing slow requests. As a result, the 'slow' counter does not reflect the real number of total slow requests in all the shard_queues under this case. And no slow requests will be tracked especially when 'log_threshold' is set to zero(Do we do this intentional? Or else we shall never allow 'log_threshold' to be zero). The solution for the above problem is to keep counting 'slow' requests until we have finished traversing all the shard_queues, no matter whether we have gathered enough requests for logging or not, and if so, we simply stop counter 'warned' and skip over logging process. Fixes: #14804 Signed-off-by: xie xingguo --- src/common/TrackedOp.cc | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/common/TrackedOp.cc b/src/common/TrackedOp.cc index f76587e7b1676..39a13b4c2832d 100644 --- a/src/common/TrackedOp.cc +++ b/src/common/TrackedOp.cc @@ -203,8 +203,7 @@ bool OpTracker::check_ops_in_flight(std::vector &warning_vector) int slow = 0; // total slow int warned = 0; // total logged - for (uint32_t iter = 0; - iter < num_optracker_shards && warned < log_threshold; iter++) { + for (uint32_t iter = 0; iter < num_optracker_shards; iter++) { ShardedTrackingData* sdata = sharded_in_flight_list[iter]; assert(NULL != sdata); Mutex::Locker locker(sdata->ops_in_flight_lock_sharded); @@ -215,12 +214,10 @@ bool OpTracker::check_ops_in_flight(std::vector &warning_vector) slow++; // exponential backoff of warning intervals - if (((*i)->get_initiated() + - (complaint_time * (*i)->warn_interval_multiplier)) < now) { - // will warn + if (warned < log_threshold && + ((*i)->get_initiated() + (complaint_time * (*i)->warn_interval_multiplier)) < now) { + // will warn, increase counter warned++; - if (warned > log_threshold) - break; utime_t age = now - (*i)->get_initiated(); stringstream ss; -- 2.39.5