]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd,mon: start using per-pool statistics reported from OSD.
authorIgor Fedotov <ifedotov@suse.com>
Mon, 11 Dec 2017 19:12:50 +0000 (22:12 +0300)
committerIgor Fedotov <ifedotov@suse.com>
Thu, 6 Dec 2018 15:54:21 +0000 (18:54 +0300)
Notable changes:
* pool statistics collection has been changed to track actual allocated
  store space instead of previous object logical sizes summation.
* 'ceph/rados df' report total space(over all replicas) allocated for a pool under "USED" column.
* "RAW USED" column removed from 'ceph df detail' report

Signed-off-by: Igor Fedotov <ifedotov@suse.com>
src/messages/MPGStats.h
src/mgr/ClusterState.cc
src/mon/PGMap.cc
src/mon/PGMap.h
src/osd/OSD.cc
src/osd/osd_types.cc
src/osd/osd_types.h
src/test/mon/PGMap.cc

index 3988318a5a8935500fd68dfede4b13ec5168c3f2..341d42d45fd5ae73c4db038bbe13c48201c668a6 100644 (file)
 #include "messages/PaxosServiceMessage.h"
 
 class MPGStats : public MessageInstance<MPGStats, PaxosServiceMessage> {
+  static const int HEAD_VERSION = 2;
+  static const int COMPAT_VERSION = 1;
 public:
   friend factory;
 
   uuid_d fsid;
-  map<pg_t,pg_stat_t> pg_stat;
+  map<pg_t, pg_stat_t> pg_stat;
   osd_stat_t osd_stat;
+  map<int64_t, store_statfs_t> pool_stat;
   epoch_t epoch = 0;
   utime_t had_map_for;
   
-  MPGStats() : MessageInstance(MSG_PGSTATS, 0) {}
+  MPGStats() : MessageInstance(MSG_PGSTATS, 0, HEAD_VERSION, COMPAT_VERSION) {}
   MPGStats(const uuid_d& f, epoch_t e, utime_t had)
-    : MessageInstance(MSG_PGSTATS, 0),
+    : MessageInstance(MSG_PGSTATS, 0, HEAD_VERSION, COMPAT_VERSION),
       fsid(f),
       epoch(e),
       had_map_for(had)
@@ -53,6 +56,7 @@ public:
     encode(pg_stat, payload);
     encode(epoch, payload);
     encode(had_map_for, payload);
+    encode(pool_stat, payload, features);
   }
   void decode_payload() override {
     auto p = payload.cbegin();
@@ -62,6 +66,8 @@ public:
     decode(pg_stat, p);
     decode(epoch, p);
     decode(had_map_for, p);
+    if (header.version >= 2)
+      decode(pool_stat, p);
   }
 };
 
index 41afe079b3fa629a17778bb198f688d4378ed53f..7e073a58bcc077b81f0781bc6c72fcfbe4e0e691 100644 (file)
@@ -69,7 +69,6 @@ void ClusterState::ingest_pgstats(MPGStats *stats)
   std::lock_guard l(lock);
 
   const int from = stats->get_orig_source().num();
-
   pending_inc.update_stat(from, std::move(stats->osd_stat));
 
   for (auto p : stats->pg_stat) {
@@ -110,6 +109,9 @@ void ClusterState::ingest_pgstats(MPGStats *stats)
 
     pending_inc.pg_stat_updates[pgid] = pg_stats;
   }
+  for (auto p : stats->pool_stat) {
+    pending_inc.pool_statfs_updates[std::make_pair(p.first, from)] = p.second;
+  }
 }
 
 void ClusterState::update_delta_stats()
@@ -128,7 +130,6 @@ void ClusterState::update_delta_stats()
   jf.dump_object("pending_inc", pending_inc);
   jf.flush(*_dout);
   *_dout << dendl;
-
   pg_map.apply_incremental(g_ceph_context, pending_inc);
   pending_inc = PGMap::Incremental();
 }
index 6632433bc71ca5168148cdb833ebe20e0380a01f..d6bade33066b3fcfd239d2bb960e34a494c868f0 100644 (file)
@@ -775,7 +775,9 @@ void PGMapDigest::dump_pool_stats_full(
       tbl.define_column("DIRTY", TextTable::LEFT, TextTable::RIGHT);
       tbl.define_column("READ", TextTable::LEFT, TextTable::RIGHT);
       tbl.define_column("WRITE", TextTable::LEFT, TextTable::RIGHT);
-      tbl.define_column("RAW USED", TextTable::LEFT, TextTable::RIGHT);
+      tbl.define_column("STORED", TextTable::LEFT, TextTable::RIGHT);
+      tbl.define_column("USED COMPR", TextTable::LEFT, TextTable::RIGHT);
+      tbl.define_column("UNDER COMPR", TextTable::LEFT, TextTable::RIGHT);
     }
   }
 
@@ -785,6 +787,7 @@ void PGMapDigest::dump_pool_stats_full(
     int64_t pool_id = p->first;
     if ((pool_id < 0) || (pg_pool_sum.count(pool_id) == 0))
       continue;
+
     const string& pool_name = osd_map.get_pool_name(pool_id);
     const pool_stat_t &stat = pg_pool_sum.at(pool_id);
 
@@ -793,7 +796,6 @@ void PGMapDigest::dump_pool_stats_full(
                                          pool->get_type(),
                                          pool->get_size());
     int64_t avail;
-    float raw_used_rate;
     if (avail_by_rule.count(ruleno) == 0) {
       // FIXME: we don't guarantee avail_space_by_rule is up-to-date before this function is invoked
       avail = get_rule_avail(ruleno);
@@ -804,8 +806,6 @@ void PGMapDigest::dump_pool_stats_full(
       avail = avail_by_rule[ruleno];
     }
 
-    raw_used_rate = ::pool_raw_used_rate(osd_map, pool_id);
-
     if (f) {
       f->open_object_section("pool");
       f->dump_string("name", pool_name);
@@ -827,7 +827,8 @@ void PGMapDigest::dump_pool_stats_full(
       }
 
     }
-    dump_object_stat_sum(tbl, f, stat.stats.sum, avail, raw_used_rate, verbose, pool);
+    float raw_used_rate = ::pool_raw_used_rate(osd_map, pool_id);
+    dump_object_stat_sum(tbl, f, stat, avail, raw_used_rate, verbose, pool);
     if (f)
       f->close_section();  // stats
     else
@@ -890,28 +891,34 @@ void PGMapDigest::dump_fs_stats(stringstream *ss, Formatter *f, bool verbose) co
 
 void PGMapDigest::dump_object_stat_sum(
   TextTable &tbl, Formatter *f,
-  const object_stat_sum_t &sum, uint64_t avail,
+  const pool_stat_t &pool_stat, uint64_t avail,
   float raw_used_rate, bool verbose,
   const pg_pool_t *pool)
 {
-  float curr_object_copies_rate = 0.0;
-  if (sum.num_object_copies > 0)
-    curr_object_copies_rate = (float)(sum.num_object_copies - sum.num_objects_degraded) / sum.num_object_copies;
+  const object_stat_sum_t &sum = pool_stat.stats.sum;
+  const store_statfs_t statfs = pool_stat.store_stats;
 
+  if (sum.num_object_copies > 0) {
+    raw_used_rate *= (float)(sum.num_object_copies - sum.num_objects_degraded) / sum.num_object_copies;
+  }
+    
   float used = 0.0;
   // note avail passed in is raw_avail, calc raw_used here.
   if (avail) {
-    used = sum.num_bytes * raw_used_rate * curr_object_copies_rate;
+    used = statfs.allocated;
     used /= used + avail;
-  } else if (sum.num_bytes) {
+  } else if (statfs.allocated) {
     used = 1.0;
   }
-
+  auto avail_res = raw_used_rate ? avail / raw_used_rate : 0;
+  // an approximation for actually stored user data
+  auto stored_normalized =
+    raw_used_rate ? statfs.stored / raw_used_rate : 0;
   if (f) {
-    f->dump_int("kb_used", shift_round_up(sum.num_bytes, 10));
-    f->dump_int("bytes_used", sum.num_bytes);
+    f->dump_int("kb_used", shift_round_up(statfs.allocated, 10));
+    f->dump_int("bytes_used", statfs.allocated);
     f->dump_float("percent_used", used);
-    f->dump_unsigned("max_avail", avail / raw_used_rate);
+    f->dump_unsigned("max_avail", avail_res);
     f->dump_int("objects", sum.num_objects);
     if (verbose) {
       f->dump_int("quota_objects", pool->quota_max_objects);
@@ -921,18 +928,25 @@ void PGMapDigest::dump_object_stat_sum(
       f->dump_int("rd_bytes", sum.num_rd_kb * 1024ull);
       f->dump_int("wr", sum.num_wr);
       f->dump_int("wr_bytes", sum.num_wr_kb * 1024ull);
-      f->dump_int("raw_bytes_used", sum.num_bytes * raw_used_rate * curr_object_copies_rate);
+      f->dump_int("stored", stored_normalized);
+      f->dump_int("compress_bytes_used", statfs.compressed_allocated);
+      f->dump_int("compress_under_bytes", statfs.compressed_original);
+      // Stored by user amplified by replication
+      f->dump_int("stored_raw", statfs.stored);
     }
   } else {
-    tbl << stringify(byte_u_t(sum.num_bytes));
+    tbl << stringify(byte_u_t(statfs.allocated));
     tbl << percentify(used*100);
-    tbl << byte_u_t(avail / raw_used_rate);
+    tbl << byte_u_t(avail_res);
     tbl << sum.num_objects;
     if (verbose) {
       tbl << stringify(si_u_t(sum.num_objects_dirty))
           << stringify(byte_u_t(sum.num_rd))
           << stringify(byte_u_t(sum.num_wr))
-          << stringify(byte_u_t(sum.num_bytes * raw_used_rate * curr_object_copies_rate));
+         << stringify(byte_u_t(stored_normalized))
+         << stringify(byte_u_t(statfs.compressed_allocated))
+         << stringify(byte_u_t(statfs.compressed_original))
+         ;
     }
   }
 }
@@ -1040,6 +1054,14 @@ void PGMap::Incremental::dump(Formatter *f) const
     f->close_section();
   }
   f->close_section();
+  f->open_array_section("pool_statfs_updates");
+  for (auto p = pool_statfs_updates.begin(); p != pool_statfs_updates.end(); ++p) {
+    f->open_object_section("pool_statfs");
+    f->dump_stream("poolid/osd") << p->first;
+    p->second.dump(f);
+    f->close_section();
+  }
+  f->close_section();
 
   f->open_array_section("osd_stat_removals");
   for (auto p = osd_stat_rm.begin(); p != osd_stat_rm.end(); ++p)
@@ -1070,9 +1092,9 @@ void PGMap::Incremental::generate_test_instances(list<PGMap::Incremental*>& o)
   o.back()->osd_stat_updates[6] = osd_stat_t();
   o.back()->pg_remove.insert(pg_t(1,2));
   o.back()->osd_stat_rm.insert(5);
+  o.back()->pool_statfs_updates[std::make_pair(1234,4)] = store_statfs_t();
 }
 
-
 // --
 
 void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
@@ -1080,27 +1102,48 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
   ceph_assert(inc.version == version+1);
   version++;
 
-  pool_stat_t pg_sum_old = pg_sum;
-  mempool::pgmap::unordered_map<uint64_t, pool_stat_t> pg_pool_sum_old;
+  mempool::pgmap::unordered_map<int32_t, pool_stat_t> pg_pool_sum_old;
+  pg_pool_sum_old = pg_pool_sum;
 
   for (auto p = inc.pg_stat_updates.begin();
        p != inc.pg_stat_updates.end();
        ++p) {
     const pg_t &update_pg(p->first);
+    auto update_pool = update_pg.pool();
     const pg_stat_t &update_stat(p->second);
 
-    if (pg_pool_sum_old.count(update_pg.pool()) == 0)
-      pg_pool_sum_old[update_pg.pool()] = pg_pool_sum[update_pg.pool()];
-
-    auto t = pg_stat.find(update_pg);
-    if (t == pg_stat.end()) {
+    auto pg_stat_iter = pg_stat.find(update_pg);
+    pool_stat_t &pool_sum_ref = pg_pool_sum[update_pool];
+    if (pg_stat_iter == pg_stat.end()) {
       pg_stat.insert(make_pair(update_pg, update_stat));
     } else {
-      stat_pg_sub(update_pg, t->second);
-      t->second = update_stat;
+      stat_pg_sub(update_pg, pg_stat_iter->second);
+      pool_sum_ref.sub(pg_stat_iter->second);
+      pg_stat_iter->second = update_stat;
     }
     stat_pg_add(update_pg, update_stat);
+    pool_sum_ref.add(update_stat);
   }
+
+  for (auto p = inc.pool_statfs_updates.begin();
+       p != inc.pool_statfs_updates.end();
+       ++p) {
+    auto update_pool = p->first.first;
+    auto update_osd =  p->first.second;
+    auto& statfs_inc = p->second;
+
+    auto pool_statfs_iter =
+      pool_statfs.find(std::make_pair(update_pool, update_osd));
+    pool_stat_t &pool_sum_ref = pg_pool_sum[update_pool];
+    if (pool_statfs_iter == pool_statfs.end()) {
+      pool_statfs.emplace(std::make_pair(update_pool, update_osd), statfs_inc);
+    } else {
+      pool_sum_ref.sub(pool_statfs_iter->second);
+      pool_statfs_iter->second = statfs_inc;
+    }
+    pool_sum_ref.add(statfs_inc);
+  }
+
   for (auto p = inc.get_osd_stat_updates().begin();
        p != inc.get_osd_stat_updates().end();
        ++p) {
@@ -1122,12 +1165,13 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
        ++p) {
     const pg_t &removed_pg(*p);
     auto s = pg_stat.find(removed_pg);
+    bool pool_erased = false;
     if (s != pg_stat.end()) {
-      stat_pg_sub(removed_pg, s->second);
+      pool_erased = stat_pg_sub(removed_pg, s->second);
       pg_stat.erase(s);
-    }
-    if (removed_pg.ps() == 0) {
-      deleted_pools.insert(removed_pg.pool());
+      if (pool_erased) {
+        deleted_pools.insert(removed_pg.pool());
+      }
     }
   }
 
@@ -1139,8 +1183,15 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
       stat_osd_sub(t->first, t->second);
       osd_stat.erase(t);
     }
+    for (auto i = pool_statfs.begin();  i != pool_statfs.end(); ++i) {
+      if (i->first.second == *p) {
+       pg_pool_sum[i->first.first].sub(i->second);
+       pool_statfs.erase(i);
+      }
+    }
   }
 
+  pool_stat_t pg_sum_old = pg_sum;
   // skip calculating delta while sum was not synchronized
   if (!stamp.is_zero() && !pg_sum_old.stats.sum.is_zero()) {
     utime_t delta_t;
@@ -1194,7 +1245,15 @@ void PGMap::calc_stats()
   for (auto p = pg_stat.begin();
        p != pg_stat.end();
        ++p) {
-    stat_pg_add(p->first, p->second);
+    auto pg = p->first;
+    stat_pg_add(pg, p->second);
+    pg_pool_sum[pg.pool()].add(p->second);
+  }
+  for (auto p = pool_statfs.begin();
+       p != pool_statfs.end();
+       ++p) {
+    auto pool = p->first.first;
+    pg_pool_sum[pool].add(p->second);
   }
   for (auto p = osd_stat.begin();
        p != osd_stat.end();
@@ -1205,13 +1264,13 @@ void PGMap::calc_stats()
 void PGMap::stat_pg_add(const pg_t &pgid, const pg_stat_t &s,
                         bool sameosds)
 {
-  pg_pool_sum[pgid.pool()].add(s);
+  auto pool = pgid.pool();
   pg_sum.add(s);
 
   num_pg++;
   num_pg_by_state[s.state]++;
   num_pg_by_pool_state[pgid.pool()][s.state]++;
-  num_pg_by_pool[pgid.pool()]++;
+  num_pg_by_pool[pool]++;
 
   if ((s.state & PG_STATE_CREATING) &&
       s.parent_split_bits == 0) {
@@ -1251,11 +1310,10 @@ void PGMap::stat_pg_add(const pg_t &pgid, const pg_stat_t &s,
   }
 }
 
-void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s,
+bool PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s,
                         bool sameosds)
 {
-  pool_stat_t& ps = pg_pool_sum[pgid.pool()];
-  ps.sub(s);
+  bool pool_erased = false;
   pg_sum.sub(s);
 
   num_pg--;
@@ -1268,9 +1326,7 @@ void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s,
   }
   end = --num_pg_by_pool[pgid.pool()];
   if (end == 0) {
-    num_pg_by_pool_state.erase(pgid.pool());
-    num_pg_by_pool.erase(pgid.pool());
-    pg_pool_sum.erase(pgid.pool());
+    pool_erased = true;
   }
 
   if ((s.state & PG_STATE_CREATING) &&
@@ -1294,7 +1350,7 @@ void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s,
   }
 
   if (sameosds)
-    return;
+    return pool_erased;
 
   for (auto p = s.blocked_by.begin();
        p != s.blocked_by.end();
@@ -1330,6 +1386,7 @@ void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s,
     if (it != num_pg_by_osd.end() && it->second.primary > 0)
       it->second.primary--;
   }
+  return pool_erased;
 }
 
 void PGMap::calc_purged_snaps()
@@ -1382,25 +1439,27 @@ void PGMap::encode_digest(const OSDMap& osdmap,
 
 void PGMap::encode(bufferlist &bl, uint64_t features) const
 {
-  ENCODE_START(7, 7, bl);
+  ENCODE_START(8, 8, bl);
   encode(version, bl);
   encode(pg_stat, bl);
   encode(osd_stat, bl, features);
   encode(last_osdmap_epoch, bl);
   encode(last_pg_scan, bl);
   encode(stamp, bl);
+  encode(pool_statfs, bl, features);
   ENCODE_FINISH(bl);
 }
 
 void PGMap::decode(bufferlist::const_iterator &bl)
 {
-  DECODE_START(7, bl);
+  DECODE_START(8, bl);
   decode(version, bl);
   decode(pg_stat, bl);
   decode(osd_stat, bl);
   decode(last_osdmap_epoch, bl);
   decode(last_pg_scan, bl);
   decode(stamp, bl);
+  decode(pool_statfs, bl);
   DECODE_FINISH(bl);
 
   calc_stats();
@@ -2044,11 +2103,11 @@ void PGMap::update_one_pool_delta(
  */
 void PGMap::update_pool_deltas(
   CephContext *cct, const utime_t ts,
-  const mempool::pgmap::unordered_map<uint64_t,pool_stat_t>& pg_pool_sum_old)
+  const mempool::pgmap::unordered_map<int32_t,pool_stat_t>& pg_pool_sum_old)
 {
   for (auto it = pg_pool_sum_old.begin();
        it != pg_pool_sum_old.end(); ++it) {
-    update_one_pool_delta(cct, ts, it->first, it->second);
+    update_one_pool_delta(cct, ts, (uint64_t)it->first, it->second); //FIXME: make pool type consistent!!!
   }
 }
 
index 69ec40e5b71d2ba5f3b7a9f48ad77cfc54a5e2ef..aaaef6b91e78d0a52310e229a0ff63d0d5d96802 100644 (file)
@@ -165,7 +165,7 @@ public:
                                    Formatter *f, bool verbose) const;
   void dump_fs_stats(stringstream *ss, Formatter *f, bool verbose) const;
   static void dump_object_stat_sum(TextTable &tbl, Formatter *f,
-                           const object_stat_sum_t &sum,
+                           const pool_stat_t &pool_stat,
                            uint64_t avail,
                            float raw_used_rate,
                            bool verbose, const pg_pool_t *pool);
@@ -230,6 +230,13 @@ public:
   mempool::pgmap::unordered_map<int32_t,osd_stat_t> osd_stat;
   mempool::pgmap::unordered_map<pg_t,pg_stat_t> pg_stat;
 
+  typedef mempool::pgmap::map<
+    std::pair<int64_t, int>,  // <pool, osd>
+    store_statfs_t>
+      per_osd_pool_statfs_t;
+
+  per_osd_pool_statfs_t pool_statfs;
+
   class Incremental {
   public:
     MEMPOOL_CLASS_HELPERS();
@@ -239,6 +246,7 @@ public:
     epoch_t pg_scan;  // osdmap epoch
     mempool::pgmap::set<pg_t> pg_remove;
     utime_t stamp;
+    per_osd_pool_statfs_t pool_statfs_updates;
 
   private:
     mempool::pgmap::map<int32_t,osd_stat_t> osd_stat_updates;
@@ -293,11 +301,20 @@ public:
   void update_pool_deltas(
     CephContext *cct,
     const utime_t ts,
-    const mempool::pgmap::unordered_map<uint64_t, pool_stat_t>& pg_pool_sum_old);
+    const mempool::pgmap::unordered_map<int32_t, pool_stat_t>& pg_pool_sum_old);
   void clear_delta();
 
   void deleted_pool(int64_t pool) {
+    for (auto i = pool_statfs.begin();  i != pool_statfs.end();) {
+      if (i->first.first == pool) {
+       i = pool_statfs.erase(i);
+      } else {
+        ++i;
+      }
+    }
+
     pg_pool_sum.erase(pool);
+    num_pg_by_pool_state.erase(pool);
     num_pg_by_pool.erase(pool);
     per_pool_sum_deltas.erase(pool);
     per_pool_sum_deltas_stamps.erase(pool);
@@ -386,7 +403,7 @@ public:
   void calc_stats();
   void stat_pg_add(const pg_t &pgid, const pg_stat_t &s,
                   bool sameosds=false);
-  void stat_pg_sub(const pg_t &pgid, const pg_stat_t &s,
+  bool stat_pg_sub(const pg_t &pgid, const pg_stat_t &s,
                   bool sameosds=false);
   void calc_purged_snaps();
   void stat_osd_add(int osd, const osd_stat_t &s);
index c30a2ba982746c1bf9b884abb1c6c61697bb48ae..4ac1b389052052183c23c5419b50eff106d7da51 100644 (file)
@@ -7299,9 +7299,13 @@ MPGStats* OSD::collect_pg_stats()
   std::lock_guard lec{min_last_epoch_clean_lock};
   min_last_epoch_clean = osdmap->get_epoch();
   min_last_epoch_clean_pgs.clear();
+
+  std::set<int64_t> pool_set;
   vector<PGRef> pgs;
   _get_pgs(&pgs);
   for (auto& pg : pgs) {
+    auto pool = pg->pg_id.pgid.pool();
+    pool_set.emplace((int64_t)pool);
     if (!pg->is_primary()) {
       continue;
     }
@@ -7311,6 +7315,16 @@ MPGStats* OSD::collect_pg_stats()
        min_last_epoch_clean_pgs.push_back(pg->pg_id.pgid);
       });
   }
+  store_statfs_t st;
+  for (auto p : pool_set) {
+    int r = store->pool_statfs(p, &st);
+    if (r == -ENOTSUP) {
+      break;
+    } else {
+      assert(r >= 0);
+      m->pool_stat[p] = st;
+    }
+  }
 
   return m;
 }
index 7240e6e66d3bb01f472e53bf9b24bb9130e1dda4..2be1b862aca955b8a97e0e2947d7f1627625e429 100644 (file)
@@ -2744,15 +2744,84 @@ bool operator==(const pg_stat_t& l, const pg_stat_t& r)
     l.snaptrimq_len == r.snaptrimq_len;
 }
 
+// -- store_statfs_t --
+
+bool store_statfs_t::operator==(const store_statfs_t& other) const
+{
+  return total == other.total
+    && available == other.available
+    && allocated == other.allocated
+    && internally_reserved == other.internally_reserved
+    && data_stored == other.data_stored
+    && data_compressed == other.data_compressed
+    && data_compressed_allocated == other.data_compressed_allocated
+    && data_compressed_original == other.data_compressed_original
+    && omap_allocated == other.omap_allocated
+    && internal_metadata == other.internal_metadata;
+}
+
+void store_statfs_t::dump(Formatter *f) const
+{
+  f->dump_int("total", total);
+  f->dump_int("available", available);
+  f->dump_int("internally_reserved", internally_reserved);
+  f->dump_int("allocated", allocated);
+  f->dump_int("data_stored", data_stored);
+  f->dump_int("data_compressed", data_compressed);
+  f->dump_int("data_compressed_allocated", data_compressed_allocated);
+  f->dump_int("data_compressed_original", data_compressed_original);
+  f->dump_int("omap_allocated", omap_allocated);
+  f->dump_int("internal_metadata", internal_metadata);
+}
+
+ostream& operator<<(ostream& out, const store_statfs_t &s)
+{
+  out << std::hex
+      << "store_statfs(0x" << s.available
+      << "/0x"  << s.internally_reserved
+      << "/0x"  << s.total
+      << ", data 0x" << s.data_stored
+      << "/0x"  << s.allocated
+      << ", compress 0x" << s.data_compressed
+      << "/0x"  << s.data_compressed_allocated
+      << "/0x"  << s.data_compressed_original
+      << ", omap 0x" << s.omap_allocated
+      << ", meta 0x" << s.internal_metadata
+      << std::dec
+      << ")";
+  return out;
+}
+
+void store_statfs_t::generate_test_instances(list<store_statfs_t*>& o)
+{
+  store_statfs_t a;
+  o.push_back(new store_statfs_t(a));
+  a.total = 234;
+  a.available = 123;
+  a.internally_reserved = 33;
+  a.allocated = 32;
+  a.data_stored = 44;
+  a.data_compressed = 21;
+  a.data_compressed_allocated = 12;
+  a.data_compressed_original = 13;
+  a.omap_allocated = 14;
+  a.internal_metadata = 15;
+  o.push_back(new store_statfs_t(a));
+}
+
 // -- pool_stat_t --
 
 void pool_stat_t::dump(Formatter *f) const
 {
   stats.dump(f);
+  f->open_object_section("store_stats");
+  store_stats.dump(f);
+  f->close_section();
   f->dump_int("log_size", log_size);
   f->dump_int("ondisk_log_size", ondisk_log_size);
   f->dump_int("up", up);
   f->dump_int("acting", acting);
+  f->dump_int("num_store_stats", acting);
 }
 
 void pool_stat_t::encode(bufferlist &bl, uint64_t features) const
@@ -2767,18 +2836,20 @@ void pool_stat_t::encode(bufferlist &bl, uint64_t features) const
     return;
   }
 
-  ENCODE_START(6, 5, bl);
+  ENCODE_START(7, 5, bl);
   encode(stats, bl);
   encode(log_size, bl);
   encode(ondisk_log_size, bl);
   encode(up, bl);
   encode(acting, bl);
+  encode(store_stats, bl);
+  encode(num_store_stats, bl);
   ENCODE_FINISH(bl);
 }
 
 void pool_stat_t::decode(bufferlist::const_iterator &bl)
 {
-  DECODE_START_LEGACY_COMPAT_LEN(6, 5, 5, bl);
+  DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl);
   if (struct_v >= 4) {
     decode(stats, bl);
     decode(log_size, bl);
@@ -2790,6 +2861,14 @@ void pool_stat_t::decode(bufferlist::const_iterator &bl)
       up = 0;
       acting = 0;
     }
+    if (struct_v >= 7) {
+      decode(store_stats, bl);
+      decode(num_store_stats, bl);
+    } else {
+      store_stats.reset();
+      num_store_stats = 0;
+    }
+
   } else {
     decode(stats.sum.num_bytes, bl);
     uint64_t num_kb;
@@ -2821,11 +2900,15 @@ void pool_stat_t::generate_test_instances(list<pool_stat_t*>& o)
 
   list<object_stat_collection_t*> l;
   object_stat_collection_t::generate_test_instances(l);
+  list<store_statfs_t*> ll;
+  store_statfs_t::generate_test_instances(ll);
   a.stats = *l.back();
+  a.store_stats = *ll.back();
   a.log_size = 123;
   a.ondisk_log_size = 456;
   a.acting = 3;
   a.up = 4;
+  a.num_store_stats = 1;
   o.push_back(new pool_stat_t(a));
 }
 
@@ -6136,49 +6219,6 @@ void OSDOp::merge_osd_op_vector_out_data(vector<OSDOp>& ops, bufferlist& out)
   }
 }
 
-bool store_statfs_t::operator==(const store_statfs_t& other) const
-{
-  return total == other.total
-    && available == other.available
-    && allocated == other.allocated
-    && data_stored == other.data_stored
-    && data_compressed == other.data_compressed
-    && data_compressed_allocated == other.data_compressed_allocated
-    && data_compressed_original == other.data_compressed_original
-    && omap_allocated == other.omap_allocated
-    && internal_metadata == other.internal_metadata;
-}
-
-void store_statfs_t::dump(Formatter *f) const
-{
-  f->dump_int("total", total);
-  f->dump_int("available", available);
-  f->dump_int("allocated", allocated);
-  f->dump_int("data_stored", data_stored);
-  f->dump_int("data_compressed", data_compressed);
-  f->dump_int("data_compressed_allocated", data_compressed_allocated);
-  f->dump_int("data_compressed_original", data_compressed_original);
-  f->dump_int("omap_allocated", omap_allocated);
-  f->dump_int("internal_metadata", internal_metadata);
-}
-
-ostream& operator<<(ostream& out, const store_statfs_t &s)
-{
-  out << std::hex
-      << "store_statfs(0x" << s.available
-      << "/0x"  << s.total
-      << ", data 0x" << s.data_stored
-      << "/0x"  << s.allocated
-      << ", compress 0x" << s.data_compressed
-      << "/0x"  << s.data_compressed_allocated
-      << "/0x"  << s.data_compressed_original
-      << ", omap 0x" << s.omap_allocated
-      << ", meta 0x" << s.internal_metadata
-      << std::dec
-      << ")";
-  return out;
-}
-
 void OSDOp::clear_data(vector<OSDOp>& ops)
 {
   for (unsigned i = 0; i < ops.size(); i++) {
index f69a71f4b86d1d9c3f155f0aa788d1a9a03288f4..17b92812ebd5b50b7b76dc258e5f95243bba8a4d 100644 (file)
@@ -2186,21 +2186,112 @@ WRITE_CLASS_ENCODER(pg_stat_t)
 
 bool operator==(const pg_stat_t& l, const pg_stat_t& r);
 
+/** store_statfs_t
+ * ObjectStore full statfs information
+ */
+struct store_statfs_t
+{
+  uint64_t total = 0;                  ///< Total bytes
+  uint64_t available = 0;              ///< Free bytes available
+  uint64_t internally_reserved = 0;    ///< Bytes reserved for internal purposes
+
+  int64_t allocated = 0;               ///< Bytes allocated by the store
+
+  int64_t data_stored = 0;                ///< Bytes actually stored by the user
+  int64_t data_compressed = 0;            ///< Bytes stored after compression
+  int64_t data_compressed_allocated = 0;  ///< Bytes allocated for compressed data
+  int64_t data_compressed_original = 0;   ///< Bytes that were compressed
+
+  int64_t omap_allocated = 0;         ///< approx usage of omap data
+  int64_t internal_metadata = 0;      ///< approx usage of internal metadata
+
+  void reset() {
+    *this = store_statfs_t();
+  }
+  void floor(int64_t f) {
+#define FLOOR(x) if (int64_t(x) < f) x = f
+    FLOOR(total);
+    FLOOR(available);
+    FLOOR(internally_reserved);
+    FLOOR(allocated);
+    FLOOR(data_stored);
+    FLOOR(data_compressed);
+    FLOOR(data_compressed_allocated);
+    FLOOR(data_compressed_original);
+
+    FLOOR(omap_allocated);
+    FLOOR(internal_metadata);
+#undef FLOOR
+  }
+
+  bool operator ==(const store_statfs_t& other) const;
+  bool is_zero() const {
+    return *this == store_statfs_t();
+  }
+  void add(const store_statfs_t& o) {
+    total += o.total;
+    available += o.available;
+    internally_reserved += o.internally_reserved;
+    allocated += o.allocated;
+    data_stored += o.data_stored;
+    data_compressed += o.data_compressed;
+    data_compressed_allocated += o.data_compressed_allocated;
+    data_compressed_original += o.data_compressed_original;
+    omap_allocated += o.omap_allocated;
+    internal_metadata += o.internal_metadata;
+  }
+  void sub(const store_statfs_t& o) {
+    total -= o.total;
+    available -= o.available;
+    internally_reserved -= o.internally_reserved;
+    allocated -= o.allocated;
+    data_stored -= o.data_stored;
+    data_compressed -= o.data_compressed;
+    data_compressed_allocated -= o.data_compressed_allocated;
+    data_compressed_original -= o.data_compressed_original;
+    omap_allocated -= o.omap_allocated;
+    internal_metadata -= o.internal_metadata;
+  }
+  void dump(Formatter *f) const;
+  DENC(store_statfs_t, v, p) {
+    DENC_START(1, 1, p);
+    denc(v.total, p);
+    denc(v.available, p);
+    denc(v.internally_reserved, p);
+    denc(v.allocated, p);
+    denc(v.data_stored, p);
+    denc(v.data_compressed, p);
+    denc(v.data_compressed_allocated, p);
+    denc(v.data_compressed_original, p);
+    denc(v.omap_allocated, p);
+    denc(v.internal_metadata, p);
+    DENC_FINISH(p);
+  }
+  static void generate_test_instances(list<store_statfs_t*>& o);
+};
+WRITE_CLASS_DENC(store_statfs_t)
+
+ostream &operator<<(ostream &lhs, const store_statfs_t &rhs);
+
 /*
  * summation over an entire pool
  */
 struct pool_stat_t {
   object_stat_collection_t stats;
+  store_statfs_t store_stats;
   int64_t log_size;
   int64_t ondisk_log_size;    // >= active_log_size
   int32_t up;       ///< number of up replicas or shards
   int32_t acting;   ///< number of acting replicas or shards
+  int32_t num_store_stats; ///< amount of store_stats accumulated
 
-  pool_stat_t() : log_size(0), ondisk_log_size(0), up(0), acting(0)
+  pool_stat_t() : log_size(0), ondisk_log_size(0), up(0), acting(0),
+    num_store_stats(0)
   { }
 
   void floor(int64_t f) {
     stats.floor(f);
+    store_stats.floor(f);
     if (log_size < f)
       log_size = f;
     if (ondisk_log_size < f)
@@ -2209,6 +2300,17 @@ struct pool_stat_t {
       up = f;
     if (acting < f)
       acting = f;
+    if (num_store_stats < f)
+      num_store_stats = f;
+  }
+
+  void add(const store_statfs_t& o) {
+    store_stats.add(o);
+    ++num_store_stats;
+  }
+  void sub(const store_statfs_t& o) {
+    store_stats.sub(o);
+    --num_store_stats;
   }
 
   void add(const pg_stat_t& o) {
@@ -2228,10 +2330,39 @@ struct pool_stat_t {
 
   bool is_zero() const {
     return (stats.is_zero() &&
+            store_stats.is_zero() &&
            log_size == 0 &&
            ondisk_log_size == 0 &&
            up == 0 &&
-           acting == 0);
+           acting == 0 &&
+           num_store_stats == 0);
+  }
+
+  // helper accessors to retrieve used/netto bytes depending on the
+  // collection method: new per-pool objectstore report or legacy PG
+  // summation at OSD.
+  // In legacy mode used and netto values are the same. But for new per-pool
+  // collection 'used' provides amount of space ALLOCATED at all related OSDs 
+  // and 'netto' is amount of stored user data.
+  uint64_t get_allocated_bytes() const {
+    uint64_t allocated_bytes;
+    if (num_store_stats) {
+      allocated_bytes = store_stats.allocated;
+    } else {
+      // legacy mode, use numbers from 'stats'
+      allocated_bytes = stats.sum.num_bytes;
+    }
+    return allocated_bytes;
+  }
+  uint64_t get_user_bytes(float raw_used_rate) const {
+    uint64_t user_bytes;
+    if (num_store_stats) {
+      user_bytes = raw_used_rate ? store_stats.data_stored / raw_used_rate : 0;
+    } else {
+      // legacy mode, use numbers from 'stats'
+       user_bytes = stats.sum.num_bytes;
+    }
+    return user_bytes;
   }
 
   void dump(Formatter *f) const;
@@ -5508,33 +5639,6 @@ struct PromoteCounter {
   }
 };
 
-/** store_statfs_t
- * ObjectStore full statfs information
- */
-struct store_statfs_t
-{
-  uint64_t total = 0;                  ///< Total bytes
-  uint64_t available = 0;              ///< Free bytes available
-
-  int64_t allocated = 0;               ///< Bytes allocated by the store
-
-  int64_t data_stored = 0;                ///< Bytes actually stored by the user
-  int64_t data_compressed = 0;            ///< Bytes stored after compression
-  int64_t data_compressed_allocated = 0;  ///< Bytes allocated for compressed data
-  int64_t data_compressed_original = 0;   ///< Bytes that were compressed
-
-  int64_t omap_allocated = 0;         ///< approx usage of omap data
-  int64_t internal_metadata = 0;      ///< approx usage of internal metadata
-
-  void reset() {
-    *this = store_statfs_t();
-  }
-  bool operator ==(const store_statfs_t& other) const;
-  void dump(Formatter *f) const;
-};
-ostream &operator<<(ostream &lhs, const store_statfs_t &rhs);
-
-
 struct pool_pg_num_history_t {
   /// last epoch updated
   epoch_t epoch = 0;
index 2d6fd4522f170829215f71c01eb756c2563e4c64..190478ec8abd1b45352c5012c4d9f16151836af9 100644 (file)
@@ -25,7 +25,7 @@ namespace {
         define_column("", TextTable::LEFT, TextTable::LEFT);
       }
       if (verbose) {
-        for (int i = 0; i < 4; i++) {
+        for (int i = 0; i < 6; i++) {
           define_column("", TextTable::LEFT, TextTable::LEFT);
         }
       }
@@ -54,7 +54,8 @@ TEST(pgmap, dump_object_stat_sum_0)
 {
   bool verbose = true;
   CheckTextTable tbl(verbose);
-  object_stat_sum_t sum;
+  pool_stat_t pool_stat;
+  object_stat_sum_t& sum = pool_stat.stats.sum;
   sum.num_bytes = 42 * 1024 * 1024;
   sum.num_objects = 42;
   sum.num_objects_degraded = 13; // there are 13 missings + not_yet_backfilled
@@ -63,8 +64,13 @@ TEST(pgmap, dump_object_stat_sum_0)
   sum.num_rd_kb = 123;
   sum.num_wr = 101;
   sum.num_wr_kb = 321;    
+  store_statfs_t &statfs = pool_stat.store_stats;
+  statfs.data_stored = 42 * 1024 * 1024 - 5;
+  statfs.allocated = 42 * 1024 * 1024 * 2;
+  statfs.data_compressed_allocated = 4334;
+  statfs.data_compressed_original = 1213;
 
-  sum.calc_copies(3);           // assuming we have 3 copies for each obj
+  sum.calc_copies(3); // assuming we have 3 copies for each obj
   // nominal amount of space available for new objects in this pool
   uint64_t avail = 2016 * 1024 * 1024;
   pg_pool_t pool;
@@ -72,25 +78,26 @@ TEST(pgmap, dump_object_stat_sum_0)
   pool.quota_max_bytes = 2000 * 1024 * 1024;
   pool.size = 2;
   pool.type = pg_pool_t::TYPE_REPLICATED;
-  PGMap::dump_object_stat_sum(tbl, nullptr, sum, avail,
+  PGMap::dump_object_stat_sum(tbl, nullptr, pool_stat, avail,
                                   pool.get_size(), verbose, &pool);  
-  ASSERT_EQ(stringify(byte_u_t(sum.num_bytes)), tbl.get(0, 0));
   float copies_rate =
     (static_cast<float>(sum.num_object_copies - sum.num_objects_degraded) /
-     sum.num_object_copies);
-  float used_bytes = sum.num_bytes * copies_rate * pool.get_size();
-  float used_percent = used_bytes / (used_bytes + avail) * 100;
+      sum.num_object_copies) * pool.get_size();
+  float used_percent = (float)statfs.allocated /
+    (statfs.allocated + avail) * 100;
+  uint64_t stored = statfs.data_stored / copies_rate;
+
   unsigned col = 0;
-  ASSERT_EQ(stringify(byte_u_t(sum.num_bytes)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(byte_u_t(statfs.allocated)), tbl.get(0, col++));
   ASSERT_EQ(percentify(used_percent), tbl.get(0, col++));
-  ASSERT_EQ(stringify(byte_u_t(avail/pool.size)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(byte_u_t(avail/copies_rate)), tbl.get(0, col++));
   ASSERT_EQ(stringify(sum.num_objects), tbl.get(0, col++));
   ASSERT_EQ(stringify(si_u_t(sum.num_objects_dirty)), tbl.get(0, col++));
   ASSERT_EQ(stringify(byte_u_t(sum.num_rd)), tbl.get(0, col++));
   ASSERT_EQ(stringify(byte_u_t(sum.num_wr)), tbl.get(0, col++));
-  // we can use pool.size for raw_used_rate if it is a replica pool
-  uint64_t raw_bytes_used = sum.num_bytes * pool.get_size() * copies_rate;
-  ASSERT_EQ(stringify(byte_u_t(raw_bytes_used)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(byte_u_t(stored)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(byte_u_t(statfs.data_compressed_allocated)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(byte_u_t(statfs.data_compressed_original)), tbl.get(0, col++));
 }
 
 // with table, without formatter, verbose = true, empty, avail > 0
@@ -98,7 +105,8 @@ TEST(pgmap, dump_object_stat_sum_1)
 {
   bool verbose = true;
   CheckTextTable tbl(verbose);
-  object_stat_sum_t sum;        // zero by default
+  pool_stat_t pool_stat;
+  object_stat_sum_t& sum = pool_stat.stats.sum; // zero by default
   ASSERT_TRUE(sum.is_zero());
   // nominal amount of space available for new objects in this pool
   uint64_t avail = 2016 * 1024 * 1024;
@@ -107,7 +115,7 @@ TEST(pgmap, dump_object_stat_sum_1)
   pool.quota_max_bytes = 2000 * 1024 * 1024;
   pool.size = 2;
   pool.type = pg_pool_t::TYPE_REPLICATED;
-  PGMap::dump_object_stat_sum(tbl, nullptr, sum, avail,
+  PGMap::dump_object_stat_sum(tbl, nullptr, pool_stat, avail,
                                   pool.get_size(), verbose, &pool);  
   ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, 0));
   unsigned col = 0;
@@ -126,7 +134,8 @@ TEST(pgmap, dump_object_stat_sum_2)
 {
   bool verbose = false;
   CheckTextTable tbl(verbose);
-  object_stat_sum_t sum;        // zero by default
+  pool_stat_t pool_stat;
+  object_stat_sum_t& sum = pool_stat.stats.sum; // zero by default
   ASSERT_TRUE(sum.is_zero());
   // nominal amount of space available for new objects in this pool
   uint64_t avail = 0;
@@ -136,7 +145,7 @@ TEST(pgmap, dump_object_stat_sum_2)
   pool.size = 2;
   pool.type = pg_pool_t::TYPE_REPLICATED;
 
-  PGMap::dump_object_stat_sum(tbl, nullptr, sum, avail,
+  PGMap::dump_object_stat_sum(tbl, nullptr, pool_stat, avail,
                                   pool.get_size(), verbose, &pool);  
   ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, 0));
   unsigned col = 0;