]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: use per-pool stats only when all OSDs are reporting
authorSage Weil <sage@redhat.com>
Wed, 10 Jul 2019 22:12:16 +0000 (17:12 -0500)
committerSage Weil <sage@redhat.com>
Thu, 11 Jul 2019 14:04:18 +0000 (09:04 -0500)
Previously, we would start using the per-pool stat sums as soon as *any*
OSDs were reporting.  For a legacy cluster, that meant that as soon as
one bluestore instance is updated or one new bluestore OSD is created, the
usage stats per pool would become useless.

Instead, only use the per-pool stats once *all* OSDs are reporting the new
values.  This mostly aligns with the health warning when one more bluestore
OSDs are not reporting; once they are update the warning goes away.  (That
does not factor in fielstore OSDs, though; all OSDs need to be new *and*
bluestore.)

Signed-off-by: Sage Weil <sage@redhat.com>
12 files changed:
src/librados/RadosClient.cc
src/librados/RadosClient.h
src/librados/librados_c.cc
src/librados/librados_cxx.cc
src/messages/MGetPoolStatsReply.h
src/mon/MgrStatMonitor.cc
src/mon/PGMap.cc
src/mon/PGMap.h
src/osd/osd_types.h
src/osdc/Objecter.cc
src/osdc/Objecter.h
src/test/mon/PGMap.cc

index 498ca69be586bcb7352d6fcf3e15dd9d24b7d7ef..2fc2012c906455da8a4d204f6fa901e00c5ff400 100644 (file)
@@ -637,15 +637,17 @@ int librados::RadosClient::pool_list(std::list<std::pair<int64_t, string> >& v)
 }
 
 int librados::RadosClient::get_pool_stats(std::list<string>& pools,
-                                         map<string,::pool_stat_t>& result)
+                                         map<string,::pool_stat_t> *result,
+                                         bool *per_pool)
 {
   Mutex mylock("RadosClient::get_pool_stats::mylock");
   Cond cond;
   bool done;
   int ret = 0;
 
-  objecter->get_pool_stats(pools, &result, new C_SafeCond(&mylock, &cond, &done,
-                                                         &ret));
+  objecter->get_pool_stats(pools, result, per_pool,
+                          new C_SafeCond(&mylock, &cond, &done,
+                                         &ret));
 
   mylock.Lock();
   while (!done)
index ecba2e6af20cb217bb4ee7e5d8cc54006aa445b2..93f7f66e9715391d2c9d6ca5d5844cccfded0e54 100644 (file)
@@ -118,7 +118,8 @@ public:
                    bool wait_latest_map = false);
 
   int pool_list(std::list<std::pair<int64_t, string> >& ls);
-  int get_pool_stats(std::list<string>& ls, map<string,::pool_stat_t>& result);
+  int get_pool_stats(std::list<string>& ls, map<string,::pool_stat_t> *result,
+    bool *per_pool);
   int get_fs_stats(ceph_statfs& result);
   bool get_pool_is_selfmanaged_snaps_mode(const std::string& pool);
 
index 40183c28f9ca8710e05118a7f0804f9cc992bf60..ac913d1ebdff07f8f93dd497679e73754674baef 100644 (file)
@@ -1010,18 +1010,19 @@ extern "C" int _rados_ioctx_pool_stat(rados_ioctx_t io,
   ls.push_back(pool_name);
 
   map<string, ::pool_stat_t> rawresult;
-  err = io_ctx_impl->client->get_pool_stats(ls, rawresult);
+  bool per_pool = false;
+  err = io_ctx_impl->client->get_pool_stats(ls, &rawresult, &per_pool);
   if (err) {
     tracepoint(librados, rados_ioctx_pool_stat_exit, err, stats);
     return err;
   }
 
   ::pool_stat_t& r = rawresult[pool_name];
-  uint64_t allocated_bytes = r.get_allocated_bytes();
+  uint64_t allocated_bytes = r.get_allocated_bytes(per_pool);
   // FIXME: raw_used_rate is unknown hence use 1.0 here
   // meaning we keep net amount aggregated over all replicas
   // Not a big deal so far since this field isn't exposed
-  uint64_t user_bytes = r.get_user_bytes(1.0);
+  uint64_t user_bytes = r.get_user_bytes(1.0, per_pool);
 
   stats->num_kb = shift_round_up(allocated_bytes, 10);
   stats->num_bytes = allocated_bytes;
index 4dc75a8900bcf26775ad083de335bb7dbe87dbcb..7a5094f1ad753a82c172b9d85f3db1fb265f6ea5 100644 (file)
@@ -2563,18 +2563,19 @@ int librados::Rados::get_pool_stats(std::list<string>& v,
                                    stats_map& result)
 {
   map<string,::pool_stat_t> rawresult;
-  int r = client->get_pool_stats(v, rawresult);
+  bool per_pool = false;
+  int r = client->get_pool_stats(v, &rawresult, &per_pool);
   for (map<string,::pool_stat_t>::iterator p = rawresult.begin();
        p != rawresult.end();
        ++p) {
     pool_stat_t& pv = result[p->first];
     auto& pstat = p->second;
     store_statfs_t &statfs = pstat.store_stats;
-    uint64_t allocated_bytes = pstat.get_allocated_bytes();
+    uint64_t allocated_bytes = pstat.get_allocated_bytes(per_pool);
     // FIXME: raw_used_rate is unknown hence use 1.0 here
     // meaning we keep net amount aggregated over all replicas
     // Not a big deal so far since this field isn't exposed
-    uint64_t user_bytes = pstat.get_user_bytes(1.0);
+    uint64_t user_bytes = pstat.get_user_bytes(1.0, per_pool);
 
     object_stat_sum_t *sum = &p->second.stats.sum;
     pv.num_kb = shift_round_up(allocated_bytes, 10);
index 9a7ccb4cc4621f0c48098da347890ca1d10fb3f2..ff474d3d5db55083d533427591c401f82bae6bfc 100644 (file)
 #define CEPH_MGETPOOLSTATSREPLY_H
 
 class MGetPoolStatsReply : public PaxosServiceMessage {
+  static constexpr int HEAD_VERSION = 2;
+  static constexpr int COMPAT_VERSION = 1;
+
 public:
   uuid_d fsid;
   std::map<std::string,pool_stat_t> pool_stats;
+  bool per_pool = false;
 
-  MGetPoolStatsReply() : PaxosServiceMessage{MSG_GETPOOLSTATSREPLY, 0} {}
+  MGetPoolStatsReply() : PaxosServiceMessage{MSG_GETPOOLSTATSREPLY, 0,
+                                            HEAD_VERSION, COMPAT_VERSION} {}
   MGetPoolStatsReply(uuid_d& f, ceph_tid_t t, version_t v) :
-    PaxosServiceMessage{MSG_GETPOOLSTATSREPLY, v},
+    PaxosServiceMessage{MSG_GETPOOLSTATSREPLY, v,
+                       HEAD_VERSION, COMPAT_VERSION},
     fsid(f) {
     set_tid(t);
   }
@@ -34,7 +40,10 @@ private:
 public:
   std::string_view get_type_name() const override { return "getpoolstats"; }
   void print(std::ostream& out) const override {
-    out << "getpoolstatsreply(" << get_tid() << " v" << version <<  ")";
+    out << "getpoolstatsreply(" << get_tid();
+    if (per_pool)
+      out << " per_pool";
+    out << " v" << version <<  ")";
   }
 
   void encode_payload(uint64_t features) override {
@@ -42,6 +51,7 @@ public:
     paxos_encode();
     encode(fsid, payload);
     encode(pool_stats, payload, features);
+    encode(per_pool, payload);
   }
   void decode_payload() override {
     using ceph::decode;
@@ -49,6 +59,11 @@ public:
     paxos_decode(p);
     decode(fsid, p);
     decode(pool_stats, p);
+    if (header.version >= 2) {
+      decode(per_pool, p);
+    } else {
+      per_pool = false;
+    }
   }
 private:
   template<class T, typename... Args>
index f88cdf823fb13e3ef98338977f53a5a5358f422d..6cef81b47ba4272f309b5205308b87dde6b4679b 100644 (file)
@@ -246,6 +246,7 @@ bool MgrStatMonitor::preprocess_getpoolstats(MonOpRequestRef op)
   }
   epoch_t ver = get_last_committed();
   auto reply = new MGetPoolStatsReply(m->fsid, m->get_tid(), ver);
+  reply->per_pool = digest.use_per_pool_stats();
   for (const auto& pool_name : m->pools) {
     const auto pool_id = mon->osdmon()->osdmap.lookup_pg_pool_name(pool_name);
     if (pool_id == -ENOENT)
index 11bf4fda886cf3b056773337d9d5de4b866dca22..31b26fd3434cd67761e145c074ec4b1c0b8f0042 100644 (file)
@@ -807,7 +807,9 @@ void PGMapDigest::dump_pool_stats_full(
           << pool_id;
     }
     float raw_used_rate = osd_map.pool_raw_used_rate(pool_id);
-    dump_object_stat_sum(tbl, f, stat, avail, raw_used_rate, verbose, pool);
+    bool per_pool = use_per_pool_stats();
+    dump_object_stat_sum(tbl, f, stat, avail, raw_used_rate, verbose, per_pool,
+                        pool);
     if (f) {
       f->close_section();  // stats
       f->close_section();  // pool
@@ -836,6 +838,8 @@ void PGMapDigest::dump_cluster_stats(stringstream *ss,
     f->dump_int("total_used_bytes", osd_sum.statfs.get_used());
     f->dump_int("total_used_raw_bytes", osd_sum.statfs.get_used_raw());
     f->dump_float("total_used_raw_ratio", osd_sum.statfs.get_used_raw_ratio());
+    f->dump_unsigned("num_osds", osd_sum.num_osds);
+    f->dump_unsigned("num_per_pool_osds", osd_sum.num_per_pool_osds);
     f->close_section();
     f->open_object_section("stats_by_class");
     for (auto& i : osd_sum_by_class) {
@@ -886,7 +890,7 @@ void PGMapDigest::dump_cluster_stats(stringstream *ss,
 void PGMapDigest::dump_object_stat_sum(
   TextTable &tbl, ceph::Formatter *f,
   const pool_stat_t &pool_stat, uint64_t avail,
-  float raw_used_rate, bool verbose,
+  float raw_used_rate, bool verbose, bool per_pool,
   const pg_pool_t *pool)
 {
   const object_stat_sum_t &sum = pool_stat.stats.sum;
@@ -895,8 +899,8 @@ void PGMapDigest::dump_object_stat_sum(
   if (sum.num_object_copies > 0) {
     raw_used_rate *= (float)(sum.num_object_copies - sum.num_objects_degraded) / sum.num_object_copies;
   }
-    
-  uint64_t used_bytes = pool_stat.get_allocated_bytes();
+
+  uint64_t used_bytes = pool_stat.get_allocated_bytes(per_pool);
 
   float used = 0.0;
   // note avail passed in is raw_avail, calc raw_used here.
@@ -908,7 +912,7 @@ void PGMapDigest::dump_object_stat_sum(
   }
   auto avail_res = raw_used_rate ? avail / raw_used_rate : 0;
   // an approximation for actually stored user data
-  auto stored_normalized = pool_stat.get_user_bytes(raw_used_rate);
+  auto stored_normalized = pool_stat.get_user_bytes(raw_used_rate, per_pool);
   if (f) {
     f->dump_int("stored", stored_normalized);
     f->dump_int("objects", sum.num_objects);
@@ -927,7 +931,7 @@ void PGMapDigest::dump_object_stat_sum(
       f->dump_int("compress_bytes_used", statfs.data_compressed_allocated);
       f->dump_int("compress_under_bytes", statfs.data_compressed_original);
       // Stored by user amplified by replication
-      f->dump_int("stored_raw", pool_stat.get_user_bytes(1.0));
+      f->dump_int("stored_raw", pool_stat.get_user_bytes(1.0, per_pool));
     }
   } else {
     tbl << stringify(byte_u_t(stored_normalized));
index 6d1af6338699d2442f9ccaf7ebc53fd5ca8c03b9..1fc92ebc7d163aaa56c9e9653f4a20a697af7107 100644 (file)
@@ -71,6 +71,10 @@ public:
 
   mempool::pgmap::map<int64_t,interval_set<snapid_t>> purged_snaps;
 
+  bool use_per_pool_stats() const {
+    return osd_sum.num_osds == osd_sum.num_per_pool_osds;
+  }
+
   // recent deltas, and summation
   /**
    * keep track of last deltas for each pool, calculated using
@@ -169,7 +173,9 @@ public:
                                   const pool_stat_t &pool_stat,
                                   uint64_t avail,
                                   float raw_used_rate,
-                                  bool verbose, const pg_pool_t *pool);
+                                  bool verbose,
+                                  bool per_pool,
+                                  const pg_pool_t *pool);
 
   size_t get_num_pg_by_osd(int osd) const {
     auto p = num_pg_by_osd.find(osd);
index e8205b30ca08a377ffdadcdc897e531cfabab69f..1455f10adcd7b883afc9fe2bdb45ad5306adf68b 100644 (file)
@@ -2509,9 +2509,9 @@ struct pool_stat_t {
   // In legacy mode used and netto values are the same. But for new per-pool
   // collection 'used' provides amount of space ALLOCATED at all related OSDs 
   // and 'netto' is amount of stored user data.
-  uint64_t get_allocated_bytes() const {
+  uint64_t get_allocated_bytes(bool per_pool) const {
     uint64_t allocated_bytes;
-    if (num_store_stats) {
+    if (per_pool) {
       allocated_bytes = store_stats.allocated;
     } else {
       // legacy mode, use numbers from 'stats'
@@ -2522,9 +2522,9 @@ struct pool_stat_t {
     allocated_bytes += stats.sum.num_omap_bytes;
     return allocated_bytes;
   }
-  uint64_t get_user_bytes(float raw_used_rate) const {
+  uint64_t get_user_bytes(float raw_used_rate, bool per_pool) const {
     uint64_t user_bytes;
-    if (num_store_stats) {
+    if (per_pool) {
       user_bytes = raw_used_rate ? store_stats.data_stored / raw_used_rate : 0;
     } else {
       // legacy mode, use numbers from 'stats'
index edca8255bbd66b06dfac5aa2b00fb80ee06cc806..e0ba1253a5ee23cb96b120676e10d1066893caec 100644 (file)
@@ -4175,6 +4175,7 @@ void Objecter::_finish_pool_op(PoolOp *op, int r)
 
 void Objecter::get_pool_stats(list<string>& pools,
                              map<string,pool_stat_t> *result,
+                             bool *per_pool,
                              Context *onfinish)
 {
   ldout(cct, 10) << "get_pool_stats " << pools << dendl;
@@ -4183,6 +4184,7 @@ void Objecter::get_pool_stats(list<string>& pools,
   op->tid = ++last_tid;
   op->pools = pools;
   op->pool_stats = result;
+  op->per_pool = per_pool;
   op->onfinish = onfinish;
   if (mon_timeout > timespan(0)) {
     op->ontimeout = timer.add_event(mon_timeout,
@@ -4229,6 +4231,7 @@ void Objecter::handle_get_pool_stats_reply(MGetPoolStatsReply *m)
     PoolStatOp *op = poolstat_ops[tid];
     ldout(cct, 10) << "have request " << tid << " at " << op << dendl;
     *op->pool_stats = m->pool_stats;
+    *op->per_pool = m->per_pool;
     if (m->version > last_seen_pgmap_version) {
       last_seen_pgmap_version = m->version;
     }
index b428ecc52176c8ab5868bd4ba2761ef8706ef59a..a0b42ae2c1593637ac672a6b734ba5df5ebe5ebd 100644 (file)
@@ -1644,6 +1644,7 @@ public:
     std::list<std::string> pools;
 
     std::map<std::string,pool_stat_t> *pool_stats;
+    bool *per_pool;
     Context *onfinish;
     uint64_t ontimeout;
 
@@ -2998,7 +2999,9 @@ private:
   void _poolstat_submit(PoolStatOp *op);
 public:
   void handle_get_pool_stats_reply(MGetPoolStatsReply *m);
-  void get_pool_stats(std::list<std::string>& pools, std::map<std::string,pool_stat_t> *result,
+  void get_pool_stats(std::list<std::string>& pools,
+                     std::map<std::string,pool_stat_t> *result,
+                     bool *per_pool,
                      Context *onfinish);
   int pool_stat_op_cancel(ceph_tid_t tid, int r);
   void _finish_pool_stat_op(PoolStatOp *op, int r);
index efdb186e7cabbc1fcc10cdeb7889c111f226d6d2..73007e0d5090cd0294ae15c09e7d10da4ac8ab3e 100644 (file)
@@ -80,7 +80,7 @@ TEST(pgmap, dump_object_stat_sum_0)
   pool.size = 2;
   pool.type = pg_pool_t::TYPE_REPLICATED;
   PGMap::dump_object_stat_sum(tbl, nullptr, pool_stat, avail,
-                                  pool.get_size(), verbose, &pool);  
+                             pool.get_size(), verbose, true, &pool);  
   float copies_rate =
     (static_cast<float>(sum.num_object_copies - sum.num_objects_degraded) /
       sum.num_object_copies) * pool.get_size();
@@ -117,7 +117,7 @@ TEST(pgmap, dump_object_stat_sum_1)
   pool.size = 2;
   pool.type = pg_pool_t::TYPE_REPLICATED;
   PGMap::dump_object_stat_sum(tbl, nullptr, pool_stat, avail,
-                                  pool.get_size(), verbose, &pool);  
+                             pool.get_size(), verbose, true, &pool);  
   unsigned col = 0;
   ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
   ASSERT_EQ(stringify(si_u_t(0)), tbl.get(0, col++));
@@ -148,7 +148,7 @@ TEST(pgmap, dump_object_stat_sum_2)
   pool.type = pg_pool_t::TYPE_REPLICATED;
 
   PGMap::dump_object_stat_sum(tbl, nullptr, pool_stat, avail,
-                                  pool.get_size(), verbose, &pool);  
+                             pool.get_size(), verbose, true, &pool);  
   unsigned col = 0;
   ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
   ASSERT_EQ(stringify(si_u_t(0)), tbl.get(0, col++));