From: Igor Fedotov Date: Mon, 11 Dec 2017 19:12:50 +0000 (+0300) Subject: osd,mon: start using per-pool statistics reported from OSD. X-Git-Tag: v14.1.0~692^2~19 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=63df4060824316476e5e022e1a923c57090c2be7;p=ceph.git osd,mon: start using per-pool statistics reported from OSD. Notable changes: * pool statistics collection has been changed to track actual allocated store space instead of previous object logical sizes summation. * 'ceph/rados df' report total space(over all replicas) allocated for a pool under "USED" column. * "RAW USED" column removed from 'ceph df detail' report Signed-off-by: Igor Fedotov --- diff --git a/src/messages/MPGStats.h b/src/messages/MPGStats.h index 3988318a5a89..341d42d45fd5 100644 --- a/src/messages/MPGStats.h +++ b/src/messages/MPGStats.h @@ -19,18 +19,21 @@ #include "messages/PaxosServiceMessage.h" class MPGStats : public MessageInstance { + static const int HEAD_VERSION = 2; + static const int COMPAT_VERSION = 1; public: friend factory; uuid_d fsid; - map pg_stat; + map pg_stat; osd_stat_t osd_stat; + map pool_stat; epoch_t epoch = 0; utime_t had_map_for; - MPGStats() : MessageInstance(MSG_PGSTATS, 0) {} + MPGStats() : MessageInstance(MSG_PGSTATS, 0, HEAD_VERSION, COMPAT_VERSION) {} MPGStats(const uuid_d& f, epoch_t e, utime_t had) - : MessageInstance(MSG_PGSTATS, 0), + : MessageInstance(MSG_PGSTATS, 0, HEAD_VERSION, COMPAT_VERSION), fsid(f), epoch(e), had_map_for(had) @@ -53,6 +56,7 @@ public: encode(pg_stat, payload); encode(epoch, payload); encode(had_map_for, payload); + encode(pool_stat, payload, features); } void decode_payload() override { auto p = payload.cbegin(); @@ -62,6 +66,8 @@ public: decode(pg_stat, p); decode(epoch, p); decode(had_map_for, p); + if (header.version >= 2) + decode(pool_stat, p); } }; diff --git a/src/mgr/ClusterState.cc b/src/mgr/ClusterState.cc index 41afe079b3fa..7e073a58bcc0 100644 --- a/src/mgr/ClusterState.cc +++ b/src/mgr/ClusterState.cc @@ -69,7 +69,6 @@ void ClusterState::ingest_pgstats(MPGStats *stats) std::lock_guard l(lock); const int from = stats->get_orig_source().num(); - pending_inc.update_stat(from, std::move(stats->osd_stat)); for (auto p : stats->pg_stat) { @@ -110,6 +109,9 @@ void ClusterState::ingest_pgstats(MPGStats *stats) pending_inc.pg_stat_updates[pgid] = pg_stats; } + for (auto p : stats->pool_stat) { + pending_inc.pool_statfs_updates[std::make_pair(p.first, from)] = p.second; + } } void ClusterState::update_delta_stats() @@ -128,7 +130,6 @@ void ClusterState::update_delta_stats() jf.dump_object("pending_inc", pending_inc); jf.flush(*_dout); *_dout << dendl; - pg_map.apply_incremental(g_ceph_context, pending_inc); pending_inc = PGMap::Incremental(); } diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index 6632433bc71c..d6bade33066b 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -775,7 +775,9 @@ void PGMapDigest::dump_pool_stats_full( tbl.define_column("DIRTY", TextTable::LEFT, TextTable::RIGHT); tbl.define_column("READ", TextTable::LEFT, TextTable::RIGHT); tbl.define_column("WRITE", TextTable::LEFT, TextTable::RIGHT); - tbl.define_column("RAW USED", TextTable::LEFT, TextTable::RIGHT); + tbl.define_column("STORED", TextTable::LEFT, TextTable::RIGHT); + tbl.define_column("USED COMPR", TextTable::LEFT, TextTable::RIGHT); + tbl.define_column("UNDER COMPR", TextTable::LEFT, TextTable::RIGHT); } } @@ -785,6 +787,7 @@ void PGMapDigest::dump_pool_stats_full( int64_t pool_id = p->first; if ((pool_id < 0) || (pg_pool_sum.count(pool_id) == 0)) continue; + const string& pool_name = osd_map.get_pool_name(pool_id); const pool_stat_t &stat = pg_pool_sum.at(pool_id); @@ -793,7 +796,6 @@ void PGMapDigest::dump_pool_stats_full( pool->get_type(), pool->get_size()); int64_t avail; - float raw_used_rate; if (avail_by_rule.count(ruleno) == 0) { // FIXME: we don't guarantee avail_space_by_rule is up-to-date before this function is invoked avail = get_rule_avail(ruleno); @@ -804,8 +806,6 @@ void PGMapDigest::dump_pool_stats_full( avail = avail_by_rule[ruleno]; } - raw_used_rate = ::pool_raw_used_rate(osd_map, pool_id); - if (f) { f->open_object_section("pool"); f->dump_string("name", pool_name); @@ -827,7 +827,8 @@ void PGMapDigest::dump_pool_stats_full( } } - dump_object_stat_sum(tbl, f, stat.stats.sum, avail, raw_used_rate, verbose, pool); + float raw_used_rate = ::pool_raw_used_rate(osd_map, pool_id); + dump_object_stat_sum(tbl, f, stat, avail, raw_used_rate, verbose, pool); if (f) f->close_section(); // stats else @@ -890,28 +891,34 @@ void PGMapDigest::dump_fs_stats(stringstream *ss, Formatter *f, bool verbose) co void PGMapDigest::dump_object_stat_sum( TextTable &tbl, Formatter *f, - const object_stat_sum_t &sum, uint64_t avail, + const pool_stat_t &pool_stat, uint64_t avail, float raw_used_rate, bool verbose, const pg_pool_t *pool) { - float curr_object_copies_rate = 0.0; - if (sum.num_object_copies > 0) - curr_object_copies_rate = (float)(sum.num_object_copies - sum.num_objects_degraded) / sum.num_object_copies; + const object_stat_sum_t &sum = pool_stat.stats.sum; + const store_statfs_t statfs = pool_stat.store_stats; + if (sum.num_object_copies > 0) { + raw_used_rate *= (float)(sum.num_object_copies - sum.num_objects_degraded) / sum.num_object_copies; + } + float used = 0.0; // note avail passed in is raw_avail, calc raw_used here. if (avail) { - used = sum.num_bytes * raw_used_rate * curr_object_copies_rate; + used = statfs.allocated; used /= used + avail; - } else if (sum.num_bytes) { + } else if (statfs.allocated) { used = 1.0; } - + auto avail_res = raw_used_rate ? avail / raw_used_rate : 0; + // an approximation for actually stored user data + auto stored_normalized = + raw_used_rate ? statfs.stored / raw_used_rate : 0; if (f) { - f->dump_int("kb_used", shift_round_up(sum.num_bytes, 10)); - f->dump_int("bytes_used", sum.num_bytes); + f->dump_int("kb_used", shift_round_up(statfs.allocated, 10)); + f->dump_int("bytes_used", statfs.allocated); f->dump_float("percent_used", used); - f->dump_unsigned("max_avail", avail / raw_used_rate); + f->dump_unsigned("max_avail", avail_res); f->dump_int("objects", sum.num_objects); if (verbose) { f->dump_int("quota_objects", pool->quota_max_objects); @@ -921,18 +928,25 @@ void PGMapDigest::dump_object_stat_sum( f->dump_int("rd_bytes", sum.num_rd_kb * 1024ull); f->dump_int("wr", sum.num_wr); f->dump_int("wr_bytes", sum.num_wr_kb * 1024ull); - f->dump_int("raw_bytes_used", sum.num_bytes * raw_used_rate * curr_object_copies_rate); + f->dump_int("stored", stored_normalized); + f->dump_int("compress_bytes_used", statfs.compressed_allocated); + f->dump_int("compress_under_bytes", statfs.compressed_original); + // Stored by user amplified by replication + f->dump_int("stored_raw", statfs.stored); } } else { - tbl << stringify(byte_u_t(sum.num_bytes)); + tbl << stringify(byte_u_t(statfs.allocated)); tbl << percentify(used*100); - tbl << byte_u_t(avail / raw_used_rate); + tbl << byte_u_t(avail_res); tbl << sum.num_objects; if (verbose) { tbl << stringify(si_u_t(sum.num_objects_dirty)) << stringify(byte_u_t(sum.num_rd)) << stringify(byte_u_t(sum.num_wr)) - << stringify(byte_u_t(sum.num_bytes * raw_used_rate * curr_object_copies_rate)); + << stringify(byte_u_t(stored_normalized)) + << stringify(byte_u_t(statfs.compressed_allocated)) + << stringify(byte_u_t(statfs.compressed_original)) + ; } } } @@ -1040,6 +1054,14 @@ void PGMap::Incremental::dump(Formatter *f) const f->close_section(); } f->close_section(); + f->open_array_section("pool_statfs_updates"); + for (auto p = pool_statfs_updates.begin(); p != pool_statfs_updates.end(); ++p) { + f->open_object_section("pool_statfs"); + f->dump_stream("poolid/osd") << p->first; + p->second.dump(f); + f->close_section(); + } + f->close_section(); f->open_array_section("osd_stat_removals"); for (auto p = osd_stat_rm.begin(); p != osd_stat_rm.end(); ++p) @@ -1070,9 +1092,9 @@ void PGMap::Incremental::generate_test_instances(list& o) o.back()->osd_stat_updates[6] = osd_stat_t(); o.back()->pg_remove.insert(pg_t(1,2)); o.back()->osd_stat_rm.insert(5); + o.back()->pool_statfs_updates[std::make_pair(1234,4)] = store_statfs_t(); } - // -- void PGMap::apply_incremental(CephContext *cct, const Incremental& inc) @@ -1080,27 +1102,48 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc) ceph_assert(inc.version == version+1); version++; - pool_stat_t pg_sum_old = pg_sum; - mempool::pgmap::unordered_map pg_pool_sum_old; + mempool::pgmap::unordered_map pg_pool_sum_old; + pg_pool_sum_old = pg_pool_sum; for (auto p = inc.pg_stat_updates.begin(); p != inc.pg_stat_updates.end(); ++p) { const pg_t &update_pg(p->first); + auto update_pool = update_pg.pool(); const pg_stat_t &update_stat(p->second); - if (pg_pool_sum_old.count(update_pg.pool()) == 0) - pg_pool_sum_old[update_pg.pool()] = pg_pool_sum[update_pg.pool()]; - - auto t = pg_stat.find(update_pg); - if (t == pg_stat.end()) { + auto pg_stat_iter = pg_stat.find(update_pg); + pool_stat_t &pool_sum_ref = pg_pool_sum[update_pool]; + if (pg_stat_iter == pg_stat.end()) { pg_stat.insert(make_pair(update_pg, update_stat)); } else { - stat_pg_sub(update_pg, t->second); - t->second = update_stat; + stat_pg_sub(update_pg, pg_stat_iter->second); + pool_sum_ref.sub(pg_stat_iter->second); + pg_stat_iter->second = update_stat; } stat_pg_add(update_pg, update_stat); + pool_sum_ref.add(update_stat); } + + for (auto p = inc.pool_statfs_updates.begin(); + p != inc.pool_statfs_updates.end(); + ++p) { + auto update_pool = p->first.first; + auto update_osd = p->first.second; + auto& statfs_inc = p->second; + + auto pool_statfs_iter = + pool_statfs.find(std::make_pair(update_pool, update_osd)); + pool_stat_t &pool_sum_ref = pg_pool_sum[update_pool]; + if (pool_statfs_iter == pool_statfs.end()) { + pool_statfs.emplace(std::make_pair(update_pool, update_osd), statfs_inc); + } else { + pool_sum_ref.sub(pool_statfs_iter->second); + pool_statfs_iter->second = statfs_inc; + } + pool_sum_ref.add(statfs_inc); + } + for (auto p = inc.get_osd_stat_updates().begin(); p != inc.get_osd_stat_updates().end(); ++p) { @@ -1122,12 +1165,13 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc) ++p) { const pg_t &removed_pg(*p); auto s = pg_stat.find(removed_pg); + bool pool_erased = false; if (s != pg_stat.end()) { - stat_pg_sub(removed_pg, s->second); + pool_erased = stat_pg_sub(removed_pg, s->second); pg_stat.erase(s); - } - if (removed_pg.ps() == 0) { - deleted_pools.insert(removed_pg.pool()); + if (pool_erased) { + deleted_pools.insert(removed_pg.pool()); + } } } @@ -1139,8 +1183,15 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc) stat_osd_sub(t->first, t->second); osd_stat.erase(t); } + for (auto i = pool_statfs.begin(); i != pool_statfs.end(); ++i) { + if (i->first.second == *p) { + pg_pool_sum[i->first.first].sub(i->second); + pool_statfs.erase(i); + } + } } + pool_stat_t pg_sum_old = pg_sum; // skip calculating delta while sum was not synchronized if (!stamp.is_zero() && !pg_sum_old.stats.sum.is_zero()) { utime_t delta_t; @@ -1194,7 +1245,15 @@ void PGMap::calc_stats() for (auto p = pg_stat.begin(); p != pg_stat.end(); ++p) { - stat_pg_add(p->first, p->second); + auto pg = p->first; + stat_pg_add(pg, p->second); + pg_pool_sum[pg.pool()].add(p->second); + } + for (auto p = pool_statfs.begin(); + p != pool_statfs.end(); + ++p) { + auto pool = p->first.first; + pg_pool_sum[pool].add(p->second); } for (auto p = osd_stat.begin(); p != osd_stat.end(); @@ -1205,13 +1264,13 @@ void PGMap::calc_stats() void PGMap::stat_pg_add(const pg_t &pgid, const pg_stat_t &s, bool sameosds) { - pg_pool_sum[pgid.pool()].add(s); + auto pool = pgid.pool(); pg_sum.add(s); num_pg++; num_pg_by_state[s.state]++; num_pg_by_pool_state[pgid.pool()][s.state]++; - num_pg_by_pool[pgid.pool()]++; + num_pg_by_pool[pool]++; if ((s.state & PG_STATE_CREATING) && s.parent_split_bits == 0) { @@ -1251,11 +1310,10 @@ void PGMap::stat_pg_add(const pg_t &pgid, const pg_stat_t &s, } } -void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s, +bool PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s, bool sameosds) { - pool_stat_t& ps = pg_pool_sum[pgid.pool()]; - ps.sub(s); + bool pool_erased = false; pg_sum.sub(s); num_pg--; @@ -1268,9 +1326,7 @@ void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s, } end = --num_pg_by_pool[pgid.pool()]; if (end == 0) { - num_pg_by_pool_state.erase(pgid.pool()); - num_pg_by_pool.erase(pgid.pool()); - pg_pool_sum.erase(pgid.pool()); + pool_erased = true; } if ((s.state & PG_STATE_CREATING) && @@ -1294,7 +1350,7 @@ void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s, } if (sameosds) - return; + return pool_erased; for (auto p = s.blocked_by.begin(); p != s.blocked_by.end(); @@ -1330,6 +1386,7 @@ void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s, if (it != num_pg_by_osd.end() && it->second.primary > 0) it->second.primary--; } + return pool_erased; } void PGMap::calc_purged_snaps() @@ -1382,25 +1439,27 @@ void PGMap::encode_digest(const OSDMap& osdmap, void PGMap::encode(bufferlist &bl, uint64_t features) const { - ENCODE_START(7, 7, bl); + ENCODE_START(8, 8, bl); encode(version, bl); encode(pg_stat, bl); encode(osd_stat, bl, features); encode(last_osdmap_epoch, bl); encode(last_pg_scan, bl); encode(stamp, bl); + encode(pool_statfs, bl, features); ENCODE_FINISH(bl); } void PGMap::decode(bufferlist::const_iterator &bl) { - DECODE_START(7, bl); + DECODE_START(8, bl); decode(version, bl); decode(pg_stat, bl); decode(osd_stat, bl); decode(last_osdmap_epoch, bl); decode(last_pg_scan, bl); decode(stamp, bl); + decode(pool_statfs, bl); DECODE_FINISH(bl); calc_stats(); @@ -2044,11 +2103,11 @@ void PGMap::update_one_pool_delta( */ void PGMap::update_pool_deltas( CephContext *cct, const utime_t ts, - const mempool::pgmap::unordered_map& pg_pool_sum_old) + const mempool::pgmap::unordered_map& pg_pool_sum_old) { for (auto it = pg_pool_sum_old.begin(); it != pg_pool_sum_old.end(); ++it) { - update_one_pool_delta(cct, ts, it->first, it->second); + update_one_pool_delta(cct, ts, (uint64_t)it->first, it->second); //FIXME: make pool type consistent!!! } } diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h index 69ec40e5b71d..aaaef6b91e78 100644 --- a/src/mon/PGMap.h +++ b/src/mon/PGMap.h @@ -165,7 +165,7 @@ public: Formatter *f, bool verbose) const; void dump_fs_stats(stringstream *ss, Formatter *f, bool verbose) const; static void dump_object_stat_sum(TextTable &tbl, Formatter *f, - const object_stat_sum_t &sum, + const pool_stat_t &pool_stat, uint64_t avail, float raw_used_rate, bool verbose, const pg_pool_t *pool); @@ -230,6 +230,13 @@ public: mempool::pgmap::unordered_map osd_stat; mempool::pgmap::unordered_map pg_stat; + typedef mempool::pgmap::map< + std::pair, // + store_statfs_t> + per_osd_pool_statfs_t; + + per_osd_pool_statfs_t pool_statfs; + class Incremental { public: MEMPOOL_CLASS_HELPERS(); @@ -239,6 +246,7 @@ public: epoch_t pg_scan; // osdmap epoch mempool::pgmap::set pg_remove; utime_t stamp; + per_osd_pool_statfs_t pool_statfs_updates; private: mempool::pgmap::map osd_stat_updates; @@ -293,11 +301,20 @@ public: void update_pool_deltas( CephContext *cct, const utime_t ts, - const mempool::pgmap::unordered_map& pg_pool_sum_old); + const mempool::pgmap::unordered_map& pg_pool_sum_old); void clear_delta(); void deleted_pool(int64_t pool) { + for (auto i = pool_statfs.begin(); i != pool_statfs.end();) { + if (i->first.first == pool) { + i = pool_statfs.erase(i); + } else { + ++i; + } + } + pg_pool_sum.erase(pool); + num_pg_by_pool_state.erase(pool); num_pg_by_pool.erase(pool); per_pool_sum_deltas.erase(pool); per_pool_sum_deltas_stamps.erase(pool); @@ -386,7 +403,7 @@ public: void calc_stats(); void stat_pg_add(const pg_t &pgid, const pg_stat_t &s, bool sameosds=false); - void stat_pg_sub(const pg_t &pgid, const pg_stat_t &s, + bool stat_pg_sub(const pg_t &pgid, const pg_stat_t &s, bool sameosds=false); void calc_purged_snaps(); void stat_osd_add(int osd, const osd_stat_t &s); diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index c30a2ba98274..4ac1b3890520 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -7299,9 +7299,13 @@ MPGStats* OSD::collect_pg_stats() std::lock_guard lec{min_last_epoch_clean_lock}; min_last_epoch_clean = osdmap->get_epoch(); min_last_epoch_clean_pgs.clear(); + + std::set pool_set; vector pgs; _get_pgs(&pgs); for (auto& pg : pgs) { + auto pool = pg->pg_id.pgid.pool(); + pool_set.emplace((int64_t)pool); if (!pg->is_primary()) { continue; } @@ -7311,6 +7315,16 @@ MPGStats* OSD::collect_pg_stats() min_last_epoch_clean_pgs.push_back(pg->pg_id.pgid); }); } + store_statfs_t st; + for (auto p : pool_set) { + int r = store->pool_statfs(p, &st); + if (r == -ENOTSUP) { + break; + } else { + assert(r >= 0); + m->pool_stat[p] = st; + } + } return m; } diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 7240e6e66d3b..2be1b862aca9 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -2744,15 +2744,84 @@ bool operator==(const pg_stat_t& l, const pg_stat_t& r) l.snaptrimq_len == r.snaptrimq_len; } +// -- store_statfs_t -- + +bool store_statfs_t::operator==(const store_statfs_t& other) const +{ + return total == other.total + && available == other.available + && allocated == other.allocated + && internally_reserved == other.internally_reserved + && data_stored == other.data_stored + && data_compressed == other.data_compressed + && data_compressed_allocated == other.data_compressed_allocated + && data_compressed_original == other.data_compressed_original + && omap_allocated == other.omap_allocated + && internal_metadata == other.internal_metadata; +} + +void store_statfs_t::dump(Formatter *f) const +{ + f->dump_int("total", total); + f->dump_int("available", available); + f->dump_int("internally_reserved", internally_reserved); + f->dump_int("allocated", allocated); + f->dump_int("data_stored", data_stored); + f->dump_int("data_compressed", data_compressed); + f->dump_int("data_compressed_allocated", data_compressed_allocated); + f->dump_int("data_compressed_original", data_compressed_original); + f->dump_int("omap_allocated", omap_allocated); + f->dump_int("internal_metadata", internal_metadata); +} + +ostream& operator<<(ostream& out, const store_statfs_t &s) +{ + out << std::hex + << "store_statfs(0x" << s.available + << "/0x" << s.internally_reserved + << "/0x" << s.total + << ", data 0x" << s.data_stored + << "/0x" << s.allocated + << ", compress 0x" << s.data_compressed + << "/0x" << s.data_compressed_allocated + << "/0x" << s.data_compressed_original + << ", omap 0x" << s.omap_allocated + << ", meta 0x" << s.internal_metadata + << std::dec + << ")"; + return out; +} + +void store_statfs_t::generate_test_instances(list& o) +{ + store_statfs_t a; + o.push_back(new store_statfs_t(a)); + a.total = 234; + a.available = 123; + a.internally_reserved = 33; + a.allocated = 32; + a.data_stored = 44; + a.data_compressed = 21; + a.data_compressed_allocated = 12; + a.data_compressed_original = 13; + a.omap_allocated = 14; + a.internal_metadata = 15; + o.push_back(new store_statfs_t(a)); +} + // -- pool_stat_t -- void pool_stat_t::dump(Formatter *f) const { stats.dump(f); + f->open_object_section("store_stats"); + store_stats.dump(f); + f->close_section(); f->dump_int("log_size", log_size); f->dump_int("ondisk_log_size", ondisk_log_size); f->dump_int("up", up); f->dump_int("acting", acting); + f->dump_int("num_store_stats", acting); } void pool_stat_t::encode(bufferlist &bl, uint64_t features) const @@ -2767,18 +2836,20 @@ void pool_stat_t::encode(bufferlist &bl, uint64_t features) const return; } - ENCODE_START(6, 5, bl); + ENCODE_START(7, 5, bl); encode(stats, bl); encode(log_size, bl); encode(ondisk_log_size, bl); encode(up, bl); encode(acting, bl); + encode(store_stats, bl); + encode(num_store_stats, bl); ENCODE_FINISH(bl); } void pool_stat_t::decode(bufferlist::const_iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(6, 5, 5, bl); + DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl); if (struct_v >= 4) { decode(stats, bl); decode(log_size, bl); @@ -2790,6 +2861,14 @@ void pool_stat_t::decode(bufferlist::const_iterator &bl) up = 0; acting = 0; } + if (struct_v >= 7) { + decode(store_stats, bl); + decode(num_store_stats, bl); + } else { + store_stats.reset(); + num_store_stats = 0; + } + } else { decode(stats.sum.num_bytes, bl); uint64_t num_kb; @@ -2821,11 +2900,15 @@ void pool_stat_t::generate_test_instances(list& o) list l; object_stat_collection_t::generate_test_instances(l); + list ll; + store_statfs_t::generate_test_instances(ll); a.stats = *l.back(); + a.store_stats = *ll.back(); a.log_size = 123; a.ondisk_log_size = 456; a.acting = 3; a.up = 4; + a.num_store_stats = 1; o.push_back(new pool_stat_t(a)); } @@ -6136,49 +6219,6 @@ void OSDOp::merge_osd_op_vector_out_data(vector& ops, bufferlist& out) } } -bool store_statfs_t::operator==(const store_statfs_t& other) const -{ - return total == other.total - && available == other.available - && allocated == other.allocated - && data_stored == other.data_stored - && data_compressed == other.data_compressed - && data_compressed_allocated == other.data_compressed_allocated - && data_compressed_original == other.data_compressed_original - && omap_allocated == other.omap_allocated - && internal_metadata == other.internal_metadata; -} - -void store_statfs_t::dump(Formatter *f) const -{ - f->dump_int("total", total); - f->dump_int("available", available); - f->dump_int("allocated", allocated); - f->dump_int("data_stored", data_stored); - f->dump_int("data_compressed", data_compressed); - f->dump_int("data_compressed_allocated", data_compressed_allocated); - f->dump_int("data_compressed_original", data_compressed_original); - f->dump_int("omap_allocated", omap_allocated); - f->dump_int("internal_metadata", internal_metadata); -} - -ostream& operator<<(ostream& out, const store_statfs_t &s) -{ - out << std::hex - << "store_statfs(0x" << s.available - << "/0x" << s.total - << ", data 0x" << s.data_stored - << "/0x" << s.allocated - << ", compress 0x" << s.data_compressed - << "/0x" << s.data_compressed_allocated - << "/0x" << s.data_compressed_original - << ", omap 0x" << s.omap_allocated - << ", meta 0x" << s.internal_metadata - << std::dec - << ")"; - return out; -} - void OSDOp::clear_data(vector& ops) { for (unsigned i = 0; i < ops.size(); i++) { diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index f69a71f4b86d..17b92812ebd5 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -2186,21 +2186,112 @@ WRITE_CLASS_ENCODER(pg_stat_t) bool operator==(const pg_stat_t& l, const pg_stat_t& r); +/** store_statfs_t + * ObjectStore full statfs information + */ +struct store_statfs_t +{ + uint64_t total = 0; ///< Total bytes + uint64_t available = 0; ///< Free bytes available + uint64_t internally_reserved = 0; ///< Bytes reserved for internal purposes + + int64_t allocated = 0; ///< Bytes allocated by the store + + int64_t data_stored = 0; ///< Bytes actually stored by the user + int64_t data_compressed = 0; ///< Bytes stored after compression + int64_t data_compressed_allocated = 0; ///< Bytes allocated for compressed data + int64_t data_compressed_original = 0; ///< Bytes that were compressed + + int64_t omap_allocated = 0; ///< approx usage of omap data + int64_t internal_metadata = 0; ///< approx usage of internal metadata + + void reset() { + *this = store_statfs_t(); + } + void floor(int64_t f) { +#define FLOOR(x) if (int64_t(x) < f) x = f + FLOOR(total); + FLOOR(available); + FLOOR(internally_reserved); + FLOOR(allocated); + FLOOR(data_stored); + FLOOR(data_compressed); + FLOOR(data_compressed_allocated); + FLOOR(data_compressed_original); + + FLOOR(omap_allocated); + FLOOR(internal_metadata); +#undef FLOOR + } + + bool operator ==(const store_statfs_t& other) const; + bool is_zero() const { + return *this == store_statfs_t(); + } + void add(const store_statfs_t& o) { + total += o.total; + available += o.available; + internally_reserved += o.internally_reserved; + allocated += o.allocated; + data_stored += o.data_stored; + data_compressed += o.data_compressed; + data_compressed_allocated += o.data_compressed_allocated; + data_compressed_original += o.data_compressed_original; + omap_allocated += o.omap_allocated; + internal_metadata += o.internal_metadata; + } + void sub(const store_statfs_t& o) { + total -= o.total; + available -= o.available; + internally_reserved -= o.internally_reserved; + allocated -= o.allocated; + data_stored -= o.data_stored; + data_compressed -= o.data_compressed; + data_compressed_allocated -= o.data_compressed_allocated; + data_compressed_original -= o.data_compressed_original; + omap_allocated -= o.omap_allocated; + internal_metadata -= o.internal_metadata; + } + void dump(Formatter *f) const; + DENC(store_statfs_t, v, p) { + DENC_START(1, 1, p); + denc(v.total, p); + denc(v.available, p); + denc(v.internally_reserved, p); + denc(v.allocated, p); + denc(v.data_stored, p); + denc(v.data_compressed, p); + denc(v.data_compressed_allocated, p); + denc(v.data_compressed_original, p); + denc(v.omap_allocated, p); + denc(v.internal_metadata, p); + DENC_FINISH(p); + } + static void generate_test_instances(list& o); +}; +WRITE_CLASS_DENC(store_statfs_t) + +ostream &operator<<(ostream &lhs, const store_statfs_t &rhs); + /* * summation over an entire pool */ struct pool_stat_t { object_stat_collection_t stats; + store_statfs_t store_stats; int64_t log_size; int64_t ondisk_log_size; // >= active_log_size int32_t up; ///< number of up replicas or shards int32_t acting; ///< number of acting replicas or shards + int32_t num_store_stats; ///< amount of store_stats accumulated - pool_stat_t() : log_size(0), ondisk_log_size(0), up(0), acting(0) + pool_stat_t() : log_size(0), ondisk_log_size(0), up(0), acting(0), + num_store_stats(0) { } void floor(int64_t f) { stats.floor(f); + store_stats.floor(f); if (log_size < f) log_size = f; if (ondisk_log_size < f) @@ -2209,6 +2300,17 @@ struct pool_stat_t { up = f; if (acting < f) acting = f; + if (num_store_stats < f) + num_store_stats = f; + } + + void add(const store_statfs_t& o) { + store_stats.add(o); + ++num_store_stats; + } + void sub(const store_statfs_t& o) { + store_stats.sub(o); + --num_store_stats; } void add(const pg_stat_t& o) { @@ -2228,10 +2330,39 @@ struct pool_stat_t { bool is_zero() const { return (stats.is_zero() && + store_stats.is_zero() && log_size == 0 && ondisk_log_size == 0 && up == 0 && - acting == 0); + acting == 0 && + num_store_stats == 0); + } + + // helper accessors to retrieve used/netto bytes depending on the + // collection method: new per-pool objectstore report or legacy PG + // summation at OSD. + // In legacy mode used and netto values are the same. But for new per-pool + // collection 'used' provides amount of space ALLOCATED at all related OSDs + // and 'netto' is amount of stored user data. + uint64_t get_allocated_bytes() const { + uint64_t allocated_bytes; + if (num_store_stats) { + allocated_bytes = store_stats.allocated; + } else { + // legacy mode, use numbers from 'stats' + allocated_bytes = stats.sum.num_bytes; + } + return allocated_bytes; + } + uint64_t get_user_bytes(float raw_used_rate) const { + uint64_t user_bytes; + if (num_store_stats) { + user_bytes = raw_used_rate ? store_stats.data_stored / raw_used_rate : 0; + } else { + // legacy mode, use numbers from 'stats' + user_bytes = stats.sum.num_bytes; + } + return user_bytes; } void dump(Formatter *f) const; @@ -5508,33 +5639,6 @@ struct PromoteCounter { } }; -/** store_statfs_t - * ObjectStore full statfs information - */ -struct store_statfs_t -{ - uint64_t total = 0; ///< Total bytes - uint64_t available = 0; ///< Free bytes available - - int64_t allocated = 0; ///< Bytes allocated by the store - - int64_t data_stored = 0; ///< Bytes actually stored by the user - int64_t data_compressed = 0; ///< Bytes stored after compression - int64_t data_compressed_allocated = 0; ///< Bytes allocated for compressed data - int64_t data_compressed_original = 0; ///< Bytes that were compressed - - int64_t omap_allocated = 0; ///< approx usage of omap data - int64_t internal_metadata = 0; ///< approx usage of internal metadata - - void reset() { - *this = store_statfs_t(); - } - bool operator ==(const store_statfs_t& other) const; - void dump(Formatter *f) const; -}; -ostream &operator<<(ostream &lhs, const store_statfs_t &rhs); - - struct pool_pg_num_history_t { /// last epoch updated epoch_t epoch = 0; diff --git a/src/test/mon/PGMap.cc b/src/test/mon/PGMap.cc index 2d6fd4522f17..190478ec8abd 100644 --- a/src/test/mon/PGMap.cc +++ b/src/test/mon/PGMap.cc @@ -25,7 +25,7 @@ namespace { define_column("", TextTable::LEFT, TextTable::LEFT); } if (verbose) { - for (int i = 0; i < 4; i++) { + for (int i = 0; i < 6; i++) { define_column("", TextTable::LEFT, TextTable::LEFT); } } @@ -54,7 +54,8 @@ TEST(pgmap, dump_object_stat_sum_0) { bool verbose = true; CheckTextTable tbl(verbose); - object_stat_sum_t sum; + pool_stat_t pool_stat; + object_stat_sum_t& sum = pool_stat.stats.sum; sum.num_bytes = 42 * 1024 * 1024; sum.num_objects = 42; sum.num_objects_degraded = 13; // there are 13 missings + not_yet_backfilled @@ -63,8 +64,13 @@ TEST(pgmap, dump_object_stat_sum_0) sum.num_rd_kb = 123; sum.num_wr = 101; sum.num_wr_kb = 321; + store_statfs_t &statfs = pool_stat.store_stats; + statfs.data_stored = 42 * 1024 * 1024 - 5; + statfs.allocated = 42 * 1024 * 1024 * 2; + statfs.data_compressed_allocated = 4334; + statfs.data_compressed_original = 1213; - sum.calc_copies(3); // assuming we have 3 copies for each obj + sum.calc_copies(3); // assuming we have 3 copies for each obj // nominal amount of space available for new objects in this pool uint64_t avail = 2016 * 1024 * 1024; pg_pool_t pool; @@ -72,25 +78,26 @@ TEST(pgmap, dump_object_stat_sum_0) pool.quota_max_bytes = 2000 * 1024 * 1024; pool.size = 2; pool.type = pg_pool_t::TYPE_REPLICATED; - PGMap::dump_object_stat_sum(tbl, nullptr, sum, avail, + PGMap::dump_object_stat_sum(tbl, nullptr, pool_stat, avail, pool.get_size(), verbose, &pool); - ASSERT_EQ(stringify(byte_u_t(sum.num_bytes)), tbl.get(0, 0)); float copies_rate = (static_cast(sum.num_object_copies - sum.num_objects_degraded) / - sum.num_object_copies); - float used_bytes = sum.num_bytes * copies_rate * pool.get_size(); - float used_percent = used_bytes / (used_bytes + avail) * 100; + sum.num_object_copies) * pool.get_size(); + float used_percent = (float)statfs.allocated / + (statfs.allocated + avail) * 100; + uint64_t stored = statfs.data_stored / copies_rate; + unsigned col = 0; - ASSERT_EQ(stringify(byte_u_t(sum.num_bytes)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(statfs.allocated)), tbl.get(0, col++)); ASSERT_EQ(percentify(used_percent), tbl.get(0, col++)); - ASSERT_EQ(stringify(byte_u_t(avail/pool.size)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(avail/copies_rate)), tbl.get(0, col++)); ASSERT_EQ(stringify(sum.num_objects), tbl.get(0, col++)); ASSERT_EQ(stringify(si_u_t(sum.num_objects_dirty)), tbl.get(0, col++)); ASSERT_EQ(stringify(byte_u_t(sum.num_rd)), tbl.get(0, col++)); ASSERT_EQ(stringify(byte_u_t(sum.num_wr)), tbl.get(0, col++)); - // we can use pool.size for raw_used_rate if it is a replica pool - uint64_t raw_bytes_used = sum.num_bytes * pool.get_size() * copies_rate; - ASSERT_EQ(stringify(byte_u_t(raw_bytes_used)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(stored)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(statfs.data_compressed_allocated)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(statfs.data_compressed_original)), tbl.get(0, col++)); } // with table, without formatter, verbose = true, empty, avail > 0 @@ -98,7 +105,8 @@ TEST(pgmap, dump_object_stat_sum_1) { bool verbose = true; CheckTextTable tbl(verbose); - object_stat_sum_t sum; // zero by default + pool_stat_t pool_stat; + object_stat_sum_t& sum = pool_stat.stats.sum; // zero by default ASSERT_TRUE(sum.is_zero()); // nominal amount of space available for new objects in this pool uint64_t avail = 2016 * 1024 * 1024; @@ -107,7 +115,7 @@ TEST(pgmap, dump_object_stat_sum_1) pool.quota_max_bytes = 2000 * 1024 * 1024; pool.size = 2; pool.type = pg_pool_t::TYPE_REPLICATED; - PGMap::dump_object_stat_sum(tbl, nullptr, sum, avail, + PGMap::dump_object_stat_sum(tbl, nullptr, pool_stat, avail, pool.get_size(), verbose, &pool); ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, 0)); unsigned col = 0; @@ -126,7 +134,8 @@ TEST(pgmap, dump_object_stat_sum_2) { bool verbose = false; CheckTextTable tbl(verbose); - object_stat_sum_t sum; // zero by default + pool_stat_t pool_stat; + object_stat_sum_t& sum = pool_stat.stats.sum; // zero by default ASSERT_TRUE(sum.is_zero()); // nominal amount of space available for new objects in this pool uint64_t avail = 0; @@ -136,7 +145,7 @@ TEST(pgmap, dump_object_stat_sum_2) pool.size = 2; pool.type = pg_pool_t::TYPE_REPLICATED; - PGMap::dump_object_stat_sum(tbl, nullptr, sum, avail, + PGMap::dump_object_stat_sum(tbl, nullptr, pool_stat, avail, pool.get_size(), verbose, &pool); ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, 0)); unsigned col = 0;