]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: restructure and simplify internal fullness checks
authorSage Weil <sage@redhat.com>
Thu, 23 Feb 2017 20:55:35 +0000 (15:55 -0500)
committerSage Weil <sage@redhat.com>
Mon, 6 Mar 2017 22:21:21 +0000 (17:21 -0500)
First, eliminate the useless nearfull failsafe--all it did was
generate a log message, which we can do based on the OSDMap
states.

Add some new helpers.

Unify the cluster nearfull/full vs failsafe states so that
failsafe is a "really" full state that is more severe than
full, so we have NONE, NEARFULL, FULL, FAILSAFE.

Pull the full/nearfull ratios out of the OSDMap (remember that
we require luminous mons, so these will be initialized).

Signed-off-by: Sage Weil <sage@redhat.com>
src/common/config_opts.h
src/osd/OSD.cc
src/osd/OSD.h

index 0cbec2006d3bc44f613de6e4ebaeef58ec564134..ef4dc9ebceba9dbced29b92360726d04a49ae936 100644 (file)
@@ -875,7 +875,6 @@ OPTION(osd_op_history_size, OPT_U32, 20)    // Max number of completed ops to tr
 OPTION(osd_op_history_duration, OPT_U32, 600) // Oldest completed op to track
 OPTION(osd_target_transaction_size, OPT_INT, 30)     // to adjust various transactions that batch smaller items
 OPTION(osd_failsafe_full_ratio, OPT_FLOAT, .97) // what % full makes an OSD "full" (failsafe)
-OPTION(osd_failsafe_nearfull_ratio, OPT_FLOAT, .90) // what % full makes an OSD near full (failsafe)
 OPTION(osd_fast_fail_on_connection_refused, OPT_BOOL, true) // immediately mark OSDs as down once they refuse to accept connections
 
 OPTION(osd_pg_object_context_cache_count, OPT_INT, 64)
index d1df4799b88a35e877deccd3a98ae93b1944c798..5c2cf7975024d4e7bbf9a57700713f79bdefd4e4 100644 (file)
@@ -278,7 +278,6 @@ OSDService::OSDService(OSD *osd) :
   stat_lock("OSDService::stat_lock"),
   full_status_lock("OSDService::full_status_lock"),
   cur_state(NONE),
-  last_msg(0),
   cur_ratio(0),
   epoch_lock("OSDService::epoch_lock"),
   boot_epoch(0), up_epoch(0), bind_epoch(0),
@@ -707,19 +706,9 @@ float OSDService::get_failsafe_full_ratio()
   return full_ratio;
 }
 
-float OSDService::get_failsafe_nearfull_ratio()
-{
-  float nearfull_ratio = cct->_conf->osd_failsafe_nearfull_ratio;
-  if (nearfull_ratio > 1.0) nearfull_ratio /= 100.0;
-  return nearfull_ratio;
-}
-
-void OSDService::check_nearfull_warning(const osd_stat_t &osd_stat)
+void OSDService::check_full_status(const osd_stat_t &osd_stat)
 {
   Mutex::Locker l(full_status_lock);
-  enum s_names new_state;
-
-  time_t now = ceph_clock_gettime();
 
   // We base ratio on kb_avail rather than kb_used because they can
   // differ significantly e.g. on btrfs volumes with a large number of
@@ -728,39 +717,80 @@ void OSDService::check_nearfull_warning(const osd_stat_t &osd_stat)
   // much space is available to use than how much we've already used.
   float ratio = ((float)(osd_stat.kb - osd_stat.kb_avail)) /
     ((float)osd_stat.kb);
-  float nearfull_ratio = get_failsafe_nearfull_ratio();
-  float full_ratio = get_failsafe_full_ratio();
   cur_ratio = ratio;
 
-  if (full_ratio > 0 && ratio > full_ratio) {
-    new_state = FULL;
-  } else if (nearfull_ratio > 0 && ratio > nearfull_ratio) {
-    new_state = NEAR;
-  } else {
+  // The OSDMap ratios take precendence.  So if the failsafe is .95 and
+  // the admin sets the cluster full to .96, the failsafe moves up to .96
+  // too.  (Not that having failsafe == full is ideal, but it's better than
+  // dropping writes before the clusters appears full.)
+  OSDMapRef osdmap = get_osdmap();
+  if (!osdmap || osdmap->get_epoch() == 0) {
     cur_state = NONE;
     return;
   }
+  float nearfull_ratio = osdmap->get_nearfull_ratio();
+  float full_ratio = std::max(osdmap->get_full_ratio(), nearfull_ratio);
+  float failsafe_ratio = std::max(get_failsafe_full_ratio(), full_ratio);
 
+  if (full_ratio <= 0 ||
+      nearfull_ratio <= 0) {
+    derr << __func__ << " full_ratio or nearfull_ratio is <= 0" << dendl;
+    cur_state = NONE;
+    return;
+  }
+
+  enum s_names new_state;
+  if (ratio > failsafe_ratio) {
+    new_state = FAILSAFE;
+  } else if (ratio > full_ratio) {
+    new_state = FULL;
+  } else if (ratio > nearfull_ratio) {
+    new_state = NEARFULL;
+  } else {
+    new_state = NONE;
+  }
+  dout(20) << __func__ << " cur ratio " << ratio
+          << ". nearfull_ratio " << nearfull_ratio
+          << ", full_ratio " << full_ratio
+          << ", failsafe_ratio " << failsafe_ratio
+          << ", new state " << get_full_state_name(new_state)
+          << dendl;
+
+  // warn
   if (cur_state != new_state) {
+    dout(10) << __func__ << " " << get_full_state_name(cur_state)
+            << " -> " << get_full_state_name(new_state) << dendl;
+    if (new_state == FAILSAFE) {
+      clog->error() << "failsafe engaged, dropping updates, now "
+                   << (int)roundf(ratio * 100) << "% full";
+    } else if (cur_state == FAILSAFE) {
+      clog->error() << "failsafe disengaged, no longer dropping updates, now "
+                   << (int)roundf(ratio * 100) << "% full";
+    }
     cur_state = new_state;
-  } else if (now - last_msg < cct->_conf->osd_op_complaint_time) {
-    return;
   }
-  last_msg = now;
-  if (cur_state == FULL)
-    clog->error() << "OSD full dropping all updates " << (int)roundf(ratio * 100) << "% full";
-  else
-    clog->warn() << "OSD near full (" << (int)roundf(ratio * 100) << "%)";
 }
 
 bool OSDService::check_failsafe_full()
 {
   Mutex::Locker l(full_status_lock);
-  if (cur_state == FULL)
+  if (cur_state == FAILSAFE)
     return true;
   return false;
 }
 
+bool OSDService::is_nearfull()
+{
+  Mutex::Locker l(full_status_lock);
+  return cur_state == NEARFULL;
+}
+
+bool OSDService::is_full()
+{
+  Mutex::Locker l(full_status_lock);
+  return cur_state >= FULL;
+}
+
 bool OSDService::too_full_for_backfill(double *_ratio, double *_max_ratio)
 {
   Mutex::Locker l(full_status_lock);
@@ -801,9 +831,9 @@ void OSDService::update_osd_stat(vector<int>& hb_peers)
   osd->logger->set(l_osd_stat_bytes_used, used);
   osd->logger->set(l_osd_stat_bytes_avail, avail);
 
-  check_nearfull_warning(osd_stat);
-
   dout(20) << "update_osd_stat " << osd_stat << dendl;
+
+  check_full_status(osd_stat);
 }
 
 void OSDService::send_message_osd_cluster(int peer, Message *m, epoch_t from_epoch)
index 6e16d633cbdd2d1bc4e5aa0bd09b126498cfe3f5..e305d7ef5e41eb07ee528918f8476ed909af4543 100644 (file)
@@ -1136,14 +1136,23 @@ public:
   // -- OSD Full Status --
 private:
   Mutex full_status_lock;
-  enum s_names { NONE, NEAR, FULL } cur_state;
-  time_t last_msg;
-  double cur_ratio;
+  enum s_names { NONE, NEARFULL, FULL, FAILSAFE } cur_state;  // ascending
+  const char *get_full_state_name(s_names s) {
+    switch (s) {
+    case NONE: return "none";
+    case NEARFULL: return "nearfull";
+    case FULL: return "full";
+    case FAILSAFE: return "failsafe";
+    default: return "???";
+    }
+  }
+  double cur_ratio;  ///< current utilization
   float get_failsafe_full_ratio();
-  float get_failsafe_nearfull_ratio();
-  void check_nearfull_warning(const osd_stat_t &stat);
+  void check_full_status(const osd_stat_t &stat);
 public:
   bool check_failsafe_full();
+  bool is_nearfull();
+  bool is_full();
   bool too_full_for_backfill(double *ratio, double *max_ratio);
 
   // -- epochs --