]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: add dump_stuck command
authorJosh Durgin <josh.durgin@dreamhost.com>
Wed, 15 Feb 2012 01:52:36 +0000 (17:52 -0800)
committerJosh Durgin <josh.durgin@dreamhost.com>
Sat, 18 Feb 2012 00:34:35 +0000 (16:34 -0800)
This will help monitoring transient pg states at a coarse level.

Fixes: #2005
Signed-off-by: Josh Durgin <josh.durgin@dreamhost.com>
src/mon/PGMap.cc
src/mon/PGMap.h
src/mon/PGMonitor.cc
src/mon/PGMonitor.h

index 5baa793f948a4f270f7593209d1944285997532d..50a6d0072f70119dff1eebad98e0354a4dfb96b5 100644 (file)
@@ -489,6 +489,60 @@ void PGMap::dump(ostream& ss) const
      << std::endl;
 }
 
+void PGMap::get_stuck_stats(PGMap::StuckPG type, utime_t cutoff,
+                           hash_map<pg_t, pg_stat_t>& stuck_pgs) const
+{
+  for (hash_map<pg_t, pg_stat_t>::const_iterator i = pg_stat.begin();
+       i != pg_stat.end();
+       ++i) {
+    utime_t val;
+    switch (type) {
+    case STUCK_INACTIVE:
+      if (i->second.state & PG_STATE_ACTIVE)
+       continue;
+      val = i->second.last_active;
+      break;
+    case STUCK_UNCLEAN:
+      if (i->second.state & PG_STATE_CLEAN)
+       continue;
+      val = i->second.last_clean;
+      break;
+    case STUCK_STALE:
+      val = i->second.last_fresh;
+      break;
+    default:
+      assert(0 == "invalid type");
+    }
+
+    if (val < cutoff) {
+      stuck_pgs[i->first] = i->second;
+    }
+  }
+}
+
+void PGMap::dump_stuck(Formatter *f, PGMap::StuckPG type, utime_t cutoff) const
+{
+  hash_map<pg_t, pg_stat_t> stuck_pg_stats;
+  get_stuck_stats(type, cutoff, stuck_pg_stats);
+  f->open_array_section("stuck_pg_stats");
+  for (hash_map<pg_t,pg_stat_t>::const_iterator i = stuck_pg_stats.begin();
+       i != stuck_pg_stats.end();
+       ++i) {
+    f->open_object_section("pg_stat");
+    f->dump_stream("pgid") << i->first;
+    i->second.dump(f);
+    f->close_section();
+  }
+  f->close_section();
+}
+
+void PGMap::dump_stuck_plain(ostream& ss, PGMap::StuckPG type, utime_t cutoff) const
+{
+  hash_map<pg_t, pg_stat_t> stuck_pg_stats;
+  get_stuck_stats(type, cutoff, stuck_pg_stats);
+  dump_pg_stats_plain(ss, stuck_pg_stats);
+}
+
 void PGMap::state_summary(ostream& ss) const
 {
   for (hash_map<int,int>::const_iterator p = num_pg_by_state.begin();
index 1fe034853077f2b9151ad904e15d1fcd434974c5..90df77db8a826f1422b862cc61dddce9a173ee51 100644 (file)
@@ -71,6 +71,13 @@ public:
   osd_stat_t osd_sum;
 
   set<pg_t> creating_pgs;   // lru: front = new additions, back = recently pinged
+
+  enum StuckPG {
+    STUCK_INACTIVE,
+    STUCK_UNCLEAN,
+    STUCK_STALE,
+    STUCK_NONE
+  };
   
   PGMap()
     : version(0),
@@ -99,6 +106,11 @@ public:
 
   void dump_pg_stats_plain(ostream& ss,
                           const hash_map<pg_t, pg_stat_t>& pg_stats) const;
+  void get_stuck_stats(StuckPG type, utime_t cutoff,
+                      hash_map<pg_t, pg_stat_t>& stuck_pgs) const;
+  void dump_stuck(Formatter *f, StuckPG type, utime_t cutoff) const;
+  void dump_stuck_plain(ostream& ss, StuckPG type, utime_t cutoff) const;
+
   void dump(ostream& ss) const;
 
   void state_summary(ostream& ss) const;
index 6016e9e8a4d44b34f2e9c024c93f8ad760541693..ce64eb09989341c6f91e4d0123fb73f25d5da6df 100644 (file)
@@ -981,6 +981,9 @@ bool PGMonitor::preprocess_command(MMonCommand *m)
       jsf.flush(ds);
       rdata.append(ds);
     }
+    else if (m->cmd[1] == "dump_stuck") {
+      r = dump_stuck_pg_stats(ss, rdata, args);
+    }
     else if (m->cmd[1] == "dump_pools_json") {
       ss << "ok";
       r = 0;
@@ -1182,3 +1185,84 @@ enum health_status_t PGMonitor::get_health(std::ostream &ss) const
 
   return ret;
 }
+
+int PGMonitor::dump_stuck_pg_stats(ostream& ss,
+                                  bufferlist& rdata,
+                                  vector<const char*>& args) const
+{
+  string format = "plain";
+  string val;
+  int threshold = 300;
+  int seconds;
+  ostringstream err;
+
+  if (args.size() < 2) {
+    ss << "Must specify inactive or unclean or stale.";
+    return -EINVAL;
+  }
+
+  PGMap::StuckPG stuck_type = PGMap::STUCK_NONE;
+  string type = args[1];
+  if (type == "inactive")
+    stuck_type = PGMap::STUCK_INACTIVE;
+  if (type == "unclean")
+    stuck_type = PGMap::STUCK_UNCLEAN;
+  if (type == "stale")
+    stuck_type = PGMap::STUCK_STALE;
+  if (stuck_type == PGMap::STUCK_NONE) {
+    ss << "Invalid stuck type '" << type
+       << "'. Valid types are: inactive, unclean, or stale";
+    return -EINVAL;
+  }
+
+  for (std::vector<const char*>::iterator i = args.begin() + 2;
+       i != args.end(); ) {
+    if (ceph_argparse_double_dash(args, i)) {
+      break;
+    } else if (ceph_argparse_witharg(args, i, &val,
+                                    "-f", "--format", (char*)NULL)) {
+      if (val != "json" && val != "plain") {
+       ss << "format must be json or plain";
+       return -EINVAL;
+      }
+      format = val;
+    } else if (ceph_argparse_withint(args, i, &seconds, &err,
+                                    "-t", "--threshold", (char*)NULL)) {
+      if (!err.str().empty()) {
+       ss << err.str();
+       return -EINVAL;
+      }
+      threshold = seconds;
+    } else if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
+      stringstream ds;
+      ds << "Usage: ceph pg dump_stuck inactive|unclean|stale [options]" << std::endl
+        << std::endl
+        << "Get stats for pgs that have not been active, clean, or refreshed in some number of seconds." << std::endl
+        << std::endl
+        << "Options: " << std::endl
+        << "  -h, --help                   display usage info" << std::endl
+        << "  -f, --format [plain|json]    output format (default: plain)" << std::endl
+        << "  -t, --threshold [seconds]    how many seconds 'stuck' is (default: 300)" << std::endl;
+      rdata.append(ds);
+      return 0;
+    } else {
+      ss << "invalid argument '" << *i << "'";
+      return -EINVAL;
+    }
+  }
+
+  utime_t now(ceph_clock_now(g_ceph_context));
+  utime_t cutoff = now - utime_t(threshold, 0);
+
+  stringstream ds;
+  if (format == "json") {
+    JSONFormatter jsf(true);
+    pg_map.dump_stuck(&jsf, stuck_type, cutoff);
+    jsf.flush(ds);
+  } else {
+    pg_map.dump_stuck_plain(ds, stuck_type, cutoff);
+  }
+  rdata.append(ds);
+  ss << "ok";
+  return 0;
+}
index 42ba9c4dd882efdd827bb5eec6886dae59beceae..3dbbfc64c12f7199956adf0efa1e181af834170a 100644 (file)
@@ -112,6 +112,15 @@ private:
    */
   bool check_down_pgs();
 
+  /**
+   * Dump stats from pgs stuck in specified states.
+   *
+   * @return 0 on success, negative error code on failure
+   */
+  int dump_stuck_pg_stats(ostream& ss,
+                         bufferlist& rdata,
+                         vector<const char*>& args) const;
+
 public:
   PGMonitor(Monitor *mn, Paxos *p);
   virtual ~PGMonitor();