]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: data loss: low space handling
authorDavid Zafman <david.zafman@inktank.com>
Wed, 13 Mar 2013 03:49:25 +0000 (20:49 -0700)
committerDavid Zafman <david.zafman@inktank.com>
Thu, 14 Mar 2013 17:20:03 +0000 (10:20 -0700)
Add check whether to allow writing ops based on failsafe full percentage
Check for failsafe nearfull warning or full error message every heartbeat
Use clock to limit messages to every 30 secs (osd_op_complaint_time)

Feature: #4197

Signed-off-by: David Zafman <david.zafman@inktank.com>
Reviewed-by: Samuel Just <sam.just@inktank.com>
src/common/config_opts.h
src/osd/OSD.cc
src/osd/OSD.h

index 811b8ae11422e58dd60ef2f3f009902dd4fb7dd0..5d3c64e45f20618c0b5c1eb34808046650e2d4dd 100644 (file)
@@ -413,6 +413,8 @@ OPTION(osd_debug_verify_snaps_on_info, OPT_BOOL, false)
 OPTION(osd_op_history_size, OPT_U32, 20)    // Max number of completed ops to track
 OPTION(osd_op_history_duration, OPT_U32, 600) // Oldest completed op to track
 OPTION(osd_target_transaction_size, OPT_INT, 30)     // to adjust various transactions that batch smaller items
+OPTION(osd_failsafe_full_ratio, OPT_FLOAT, .97) // what % full makes an OSD "full" (failsafe)
+OPTION(osd_failsafe_nearfull_ratio, OPT_FLOAT, .90) // what % full makes an OSD near full (failsafe)
 
 /**
  * osd_client_op_priority and osd_recovery_op_priority adjust the relative
index c1619799fc2ba484ed98447c63476eaf6cea1eaf..16edb3c0739314de0b22f89bd2e56e292805143c 100644 (file)
@@ -181,7 +181,10 @@ OSDService::OSDService(OSD *osd) :
   map_cache(g_conf->osd_map_cache_size),
   map_bl_cache(g_conf->osd_map_cache_size),
   map_bl_inc_cache(g_conf->osd_map_cache_size),
-  in_progress_split_lock("OSDService::in_progress_split_lock")
+  in_progress_split_lock("OSDService::in_progress_split_lock"),
+  full_status_lock("OSDService::full_status_lock"),
+  cur_state(NONE),
+  last_msg(0)
 {}
 
 void OSDService::_start_split(const set<pg_t> &pgs)
@@ -1968,6 +1971,60 @@ void OSD::project_pg_history(pg_t pgid, pg_history_t& h, epoch_t from,
 
 // -------------------------------------
 
+float OSDService::get_full_ratio()
+{
+  float full_ratio = g_conf->osd_failsafe_full_ratio;
+  if (full_ratio > 1.0) full_ratio /= 100.0;
+  return full_ratio;
+}
+
+float OSDService::get_nearfull_ratio()
+{
+  float nearfull_ratio = g_conf->osd_failsafe_nearfull_ratio;
+  if (nearfull_ratio > 1.0) nearfull_ratio /= 100.0;
+  return nearfull_ratio;
+}
+
+void OSDService::check_nearfull_warning(const osd_stat_t &osd_stat)
+{
+  Mutex::Locker l(full_status_lock);
+  enum s_names new_state;
+
+  time_t now = ceph_clock_gettime(NULL);
+
+  float ratio = ((float)osd_stat.kb_used) / ((float)osd_stat.kb);
+  float nearfull_ratio = get_nearfull_ratio();
+  float full_ratio = get_full_ratio();
+
+  if (full_ratio > 0 && ratio > full_ratio) {
+    new_state = FULL;
+  } else if (nearfull_ratio > 0 && ratio > nearfull_ratio) {
+    new_state = NEAR;
+  } else {
+    cur_state = NONE;
+    return;
+  }
+
+  if (cur_state != new_state) {
+    cur_state = new_state;
+  } else if (now - last_msg < g_conf->osd_op_complaint_time) {
+    return;
+  }
+  last_msg = now;
+  if (cur_state == FULL)
+    clog.error() << "OSD full dropping all updates " << (int)(ratio * 100) << "% full";
+  else
+    clog.warn() << "OSD near full (" << (int)(ratio * 100) << "%)";
+}
+
+bool OSDService::check_failsafe_full()
+{
+  Mutex::Locker l(full_status_lock);
+  if (cur_state == FULL)
+    return true;
+  return false;
+}
+
 void OSD::update_osd_stat()
 {
   // fill in osd stats too
@@ -1983,6 +2040,8 @@ void OSD::update_osd_stat()
     osd_stat.hb_in.push_back(p->first);
   osd_stat.hb_out.clear();
 
+  service.check_nearfull_warning(osd_stat);
+
   dout(20) << "update_osd_stat " << osd_stat << dendl;
 }
 
@@ -5948,7 +6007,8 @@ void OSD::handle_op(OpRequestRef op)
 
   if (op->may_write()) {
     // full?
-    if (osdmap->test_flag(CEPH_OSDMAP_FULL) &&
+    if ((service.check_failsafe_full() ||
+                 osdmap->test_flag(CEPH_OSDMAP_FULL)) &&
        !m->get_source().is_mds()) {  // FIXME: we'll exclude mds writes for now.
       service.reply_op_error(op, -ENOSPC);
       return;
index 1ee2750c44487694078451ed6330af0e6a4ac686..1ce80e1414e12b39798c546e9e0fbaab5bfbd5d3 100644 (file)
@@ -397,6 +397,15 @@ public:
   void expand_pg_num(OSDMapRef old_map,
                     OSDMapRef new_map);
 
+  // -- OSD Full Status --
+  Mutex full_status_lock;
+  enum s_names { NONE, NEAR, FULL } cur_state;
+  time_t last_msg;
+  float get_full_ratio();
+  float get_nearfull_ratio();
+  void check_nearfull_warning(const osd_stat_t &stat);
+  bool check_failsafe_full();
+
   OSDService(OSD *osd);
 };
 class OSD : public Dispatcher,