]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
TrackedOp: specify queue sizes and warnings on a per-tracker basis
authorGreg Farnum <greg@inktank.com>
Thu, 3 Oct 2013 00:12:06 +0000 (17:12 -0700)
committerGreg Farnum <greg@inktank.com>
Thu, 3 Oct 2013 22:18:44 +0000 (15:18 -0700)
If we have multiple trackers in a daemon, we want to be able to configure
them separately. Plus, users already know how to control op sizes in the
OSD, so changing the config options (as we did in a8bbb81b7b7b6420ea08bc4e99a39adc6c3c397a)
is not really appropriate. Instead, provider setters which can be called
at construction time (or on any other change) and use them in the OSD with
the configurables we had previously. Add an observer so you can continue
to change them at run-time

This reverts a8bbb81b7b7b6420ea08bc4e99a39adc6c3c397a

Signed-off-by: Greg Farnum <greg@inktank.com>
src/common/TrackedOp.cc
src/common/TrackedOp.h
src/common/config_opts.h
src/osd/OSD.cc

index 82594a6491e641ada7458deb48a43b3731a6fe4c..d1dbc1e71354cb8975b95ac182db46cda6e47355 100644 (file)
@@ -48,14 +48,14 @@ void OpHistory::cleanup(utime_t now)
 {
   while (arrived.size() &&
         (now - arrived.begin()->first >
-         (double)(tracker->cct->_conf->op_tracker_history_duration))) {
+         (double)(history_duration))) {
     duration.erase(make_pair(
        arrived.begin()->second->get_duration(),
        arrived.begin()->second));
     arrived.erase(arrived.begin());
   }
 
-  while (duration.size() > tracker->cct->_conf->op_tracker_history_size) {
+  while (duration.size() > history_size) {
     arrived.erase(make_pair(
        duration.begin()->second->get_arrived(),
        duration.begin()->second));
@@ -67,8 +67,8 @@ void OpHistory::dump_ops(utime_t now, Formatter *f)
 {
   cleanup(now);
   f->open_object_section("OpHistory");
-  f->dump_int("num to keep", tracker->cct->_conf->op_tracker_history_size);
-  f->dump_int("duration to keep", tracker->cct->_conf->op_tracker_history_duration);
+  f->dump_int("num to keep", history_size);
+  f->dump_int("duration to keep", history_duration);
   {
     f->open_array_section("Ops");
     for (set<pair<utime_t, TrackedOpRef> >::const_iterator i =
@@ -132,7 +132,7 @@ bool OpTracker::check_ops_in_flight(std::vector<string> &warning_vector)
 
   utime_t now = ceph_clock_now(cct);
   utime_t too_old = now;
-  too_old -= cct->_conf->op_tracker_complaint_time;
+  too_old -= complaint_time;
 
   utime_t oldest_secs = now - ops_in_flight.front()->get_arrived();
 
@@ -140,11 +140,11 @@ bool OpTracker::check_ops_in_flight(std::vector<string> &warning_vector)
            << "; oldest is " << oldest_secs
            << " seconds old" << dendl;
 
-  if (oldest_secs < cct->_conf->op_tracker_complaint_time)
+  if (oldest_secs < complaint_time)
     return false;
 
   xlist<TrackedOp*>::iterator i = ops_in_flight.begin();
-  warning_vector.reserve(cct->_conf->op_tracker_log_threshold + 1);
+  warning_vector.reserve(log_threshold + 1);
 
   int slow = 0;     // total slow
   int warned = 0;   // total logged
@@ -153,13 +153,12 @@ bool OpTracker::check_ops_in_flight(std::vector<string> &warning_vector)
 
     // exponential backoff of warning intervals
     if (((*i)->get_arrived() +
-        (cct->_conf->op_tracker_complaint_time *
-         (*i)->warn_interval_multiplier)) < now) {
+        (complaint_time * (*i)->warn_interval_multiplier)) < now) {
       // will warn
       if (warning_vector.empty())
        warning_vector.push_back("");
       warned++;
-      if (warned > cct->_conf->op_tracker_log_threshold)
+      if (warned > log_threshold)
         break;
 
       utime_t age = now - (*i)->get_arrived();
index 2fe9eeb230c1e60b5f5a11e035817daa39db6762..9e00c14b1784a33f786e160fc0b9451d3d9ac7f3 100644 (file)
@@ -32,9 +32,12 @@ class OpHistory {
   void cleanup(utime_t now);
   bool shutdown;
   OpTracker *tracker;
+  uint32_t history_size;
+  uint32_t history_duration;
 
 public:
-  OpHistory(OpTracker *tracker_) : shutdown(false), tracker(tracker_) {}
+  OpHistory(OpTracker *tracker_) : shutdown(false), tracker(tracker_),
+  history_size(0), history_duration(0) {}
   ~OpHistory() {
     assert(arrived.empty());
     assert(duration.empty());
@@ -42,6 +45,10 @@ public:
   void insert(utime_t now, TrackedOpRef op);
   void dump_ops(utime_t now, Formatter *f);
   void on_shutdown();
+  void set_size_and_duration(uint32_t new_size, uint32_t new_duration) {
+    history_size = new_size;
+    history_duration = new_duration;
+  }
 };
 
 class OpTracker {
@@ -57,10 +64,20 @@ class OpTracker {
   Mutex ops_in_flight_lock;
   xlist<TrackedOp *> ops_in_flight;
   OpHistory history;
+  float complaint_time;
+  int log_threshold;
 
 public:
   CephContext *cct;
-  OpTracker(CephContext *cct_) : seq(0), ops_in_flight_lock("OpTracker mutex"), history(this), cct(cct_) {}
+  OpTracker(CephContext *cct_) : seq(0), ops_in_flight_lock("OpTracker mutex"),
+      history(this), complaint_time(0), log_threshold(0), cct(cct_) {}
+  void set_complaint_and_threshold(float time, int threshold) {
+    complaint_time = time;
+    log_threshold = threshold;
+  }
+  void set_history_size_and_duration(uint32_t new_size, uint32_t new_duration) {
+    history.set_size_and_duration(new_size, new_duration);
+  }
   void dump_ops_in_flight(Formatter *f);
   void dump_historic_ops(Formatter *f);
   void register_inflight_op(xlist<TrackedOp*>::item *i);
index 521176c4672651c1f053654effeb029cdf55f576..f62832396602da7ca177aa5b8ca19facdc3f6e9d 100644 (file)
@@ -471,9 +471,9 @@ OPTION(osd_pg_epoch_persisted_max_stale, OPT_U32, 200)
 
 OPTION(osd_min_pg_log_entries, OPT_U32, 3000)  // number of entries to keep in the pg log when trimming it
 OPTION(osd_max_pg_log_entries, OPT_U32, 10000) // max entries, say when degraded, before we trim
-OPTION(op_tracker_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy
+OPTION(osd_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy
 OPTION(osd_command_max_records, OPT_INT, 256)
-OPTION(op_tracker_log_threshold, OPT_INT, 5) // how many op log messages to show in one go
+OPTION(osd_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go
 OPTION(osd_verify_sparse_read_holes, OPT_BOOL, false)  // read fiemap-reported holes and verify they are zeros
 OPTION(osd_debug_drop_ping_probability, OPT_DOUBLE, 0)
 OPTION(osd_debug_drop_ping_duration, OPT_INT, 0)
@@ -484,8 +484,8 @@ OPTION(osd_debug_op_order, OPT_BOOL, false)
 OPTION(osd_debug_verify_snaps_on_info, OPT_BOOL, false)
 OPTION(osd_debug_verify_stray_on_activate, OPT_BOOL, false)
 OPTION(osd_debug_skip_full_check_in_backfill_reservation, OPT_BOOL, false)
-OPTION(op_tracker_history_size, OPT_U32, 20)    // Max number of completed ops to track
-OPTION(op_tracker_history_duration, OPT_U32, 600) // Oldest completed op to track
+OPTION(osd_op_history_size, OPT_U32, 20)    // Max number of completed ops to track
+OPTION(osd_op_history_duration, OPT_U32, 600) // Oldest completed op to track
 OPTION(osd_target_transaction_size, OPT_INT, 30)     // to adjust various transactions that batch smaller items
 OPTION(osd_failsafe_full_ratio, OPT_FLOAT, .97) // what % full makes an OSD "full" (failsafe)
 OPTION(osd_failsafe_nearfull_ratio, OPT_FLOAT, .90) // what % full makes an OSD near full (failsafe)
@@ -510,7 +510,7 @@ OPTION(osd_debug_pg_log_writeout, OPT_BOOL, false)
  * 1..63.
  *
  * osd_recovery_op_warn_multiple scales the normal warning threshhold,
- * op_tracker_complaint_time, so that slow recovery ops won't cause noise
+ * osd_op_complaint_time, so that slow recovery ops won't cause noise
  */
 OPTION(osd_client_op_priority, OPT_U32, 63)
 OPTION(osd_recovery_op_priority, OPT_U32, 10)
index 8150d9b3cd98d705d5da9aa1309f38e4a1649fba..1ba35ec2ef5d5f981ce555f2aa2109a79fa95e93 100644 (file)
@@ -979,6 +979,10 @@ OSD::OSD(CephContext *cct_, int id, Messenger *internal_messenger, Messenger *ex
   service(this)
 {
   monc->set_messenger(client_messenger);
+  op_tracker.set_complaint_and_threshold(cct->_conf->osd_op_complaint_time,
+                                         cct->_conf->osd_op_log_threshold);
+  op_tracker.set_history_size_and_duration(cct->_conf->osd_op_history_size,
+                                           cct->_conf->osd_op_history_duration);
 }
 
 OSD::~OSD()
@@ -2560,7 +2564,7 @@ void OSDService::check_nearfull_warning(const osd_stat_t &osd_stat)
 
   if (cur_state != new_state) {
     cur_state = new_state;
-  } else if (now - last_msg < cct->_conf->op_tracker_complaint_time) {
+  } else if (now - last_msg < cct->_conf->osd_op_complaint_time) {
     return;
   }
   last_msg = now;
@@ -7318,6 +7322,8 @@ const char** OSD::get_tracked_conf_keys() const
 {
   static const char* KEYS[] = {
     "osd_max_backfills",
+    "osd_op_complaint_time", "osd_op_log_threshold",
+    "osd_op_history_size", "osd_op_history_duration",
     NULL
   };
   return KEYS;
@@ -7330,6 +7336,16 @@ void OSD::handle_conf_change(const struct md_config_t *conf,
     service.local_reserver.set_max(cct->_conf->osd_max_backfills);
     service.remote_reserver.set_max(cct->_conf->osd_max_backfills);
   }
+  if (changed.count("osd_op_complaint_time") ||
+      changed.count("osd_op_log_threshold")) {
+    op_tracker.set_complaint_and_threshold(cct->_conf->osd_op_complaint_time,
+                                           cct->_conf->osd_op_log_threshold);
+  }
+  if (changed.count("osd_op_history_size") ||
+      changed.count("osd_op_history_duration")) {
+    op_tracker.set_history_size_and_duration(cct->_conf->osd_op_history_size,
+                                             cct->_conf->osd_op_history_duration);
+  }
 }
 
 // --------------------------------