From: Piotr Dałek Date: Wed, 13 Dec 2017 14:23:55 +0000 (+0100) Subject: osd, PG: share the snaptrimq.size() within pg_stat_t X-Git-Tag: v13.0.2~572^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=022d246f17b31d309525395831c84e9856533adb;p=ceph.git osd, PG: share the snaptrimq.size() within pg_stat_t That way it will be unnecessary to go through all pgs separately to find pgs with excessively long snap trim queues. And we don't need to share snap trim queues itself, which may be large by itself. As snap trim queues tend to be short and anything above 50 000 I consider absurdly large, the snaptrimq_len is capped at 2^32 to save space in pg_stat_t. Signed-off-by: Piotr Dałek --- diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index c5107af996c7..4afa37826a9e 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -1479,6 +1479,7 @@ void PGMap::dump_pg_stats_plain( tab.define_column("SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT); tab.define_column("LAST_DEEP_SCRUB", TextTable::LEFT, TextTable::RIGHT); tab.define_column("DEEP_SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("SNAPTRIMQ_LEN", TextTable::LEFT, TextTable::RIGHT); } for (auto i = pg_stats.begin(); @@ -1517,6 +1518,7 @@ void PGMap::dump_pg_stats_plain( << st.last_scrub_stamp << st.last_deep_scrub << st.last_deep_scrub_stamp + << st.snaptrimq_len << TextTable::endrow; } } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 402a9d7c65b9..19b3ac6a92e8 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2668,6 +2668,7 @@ void PG::_update_calc_stats() info.stats.ondisk_log_size = info.stats.log_size; info.stats.log_start = pg_log.get_tail(); info.stats.ondisk_log_start = pg_log.get_tail(); + info.stats.snaptrimq_len = snap_trimq.size(); // If actingset is larger then upset we will have misplaced, // so we will report based on actingset size. diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index e8599f92197a..d0031ff7783d 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -2319,6 +2319,7 @@ void pg_stat_t::dump(Formatter *f) const f->dump_bool("hitset_stats_invalid", hitset_stats_invalid); f->dump_bool("hitset_bytes_stats_invalid", hitset_bytes_stats_invalid); f->dump_bool("pin_stats_invalid", pin_stats_invalid); + f->dump_unsigned("snaptrimq_len", snaptrimq_len); stats.dump(f); f->open_array_section("up"); for (vector::const_iterator p = up.begin(); p != up.end(); ++p) @@ -2364,7 +2365,7 @@ void pg_stat_t::dump_brief(Formatter *f) const void pg_stat_t::encode(bufferlist &bl) const { - ENCODE_START(24, 22, bl); + ENCODE_START(25, 22, bl); ::encode(version, bl); ::encode(reported_seq, bl); ::encode(reported_epoch, bl); @@ -2407,6 +2408,7 @@ void pg_stat_t::encode(bufferlist &bl) const ::encode(pin_stats_invalid, bl); ::encode(state, bl); ::encode(purged_snaps, bl); + ::encode(snaptrimq_len, bl); ENCODE_FINISH(bl); } @@ -2468,6 +2470,9 @@ void pg_stat_t::decode(bufferlist::iterator &bl) } if (struct_v >= 24) { ::decode(purged_snaps, bl); + if (struct_v >= 25) { + ::decode(snaptrimq_len, bl); + } } DECODE_FINISH(bl); } @@ -2500,6 +2505,7 @@ void pg_stat_t::generate_test_instances(list& o) a.last_deep_scrub = eversion_t(13, 14); a.last_deep_scrub_stamp = utime_t(15, 16); a.last_clean_scrub_stamp = utime_t(17, 18); + a.snaptrimq_len = 1048576; list l; object_stat_collection_t::generate_test_instances(l); a.stats = *l.back(); @@ -2563,7 +2569,8 @@ bool operator==(const pg_stat_t& l, const pg_stat_t& r) l.up_primary == r.up_primary && l.acting_primary == r.acting_primary && l.pin_stats_invalid == r.pin_stats_invalid && - l.purged_snaps == r.purged_snaps; + l.purged_snaps == r.purged_snaps && + l.snaptrimq_len == r.snaptrimq_len; } // -- pool_stat_t -- diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index bc491dea2592..d3d866019ae4 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1963,6 +1963,10 @@ struct pg_stat_t { int32_t up_primary; int32_t acting_primary; + // snaptrimq.size() is 64bit, but let's be serious - anything over 50k is + // absurd already, so cap it to 2^32 and save 4 bytes at the same time + uint32_t snaptrimq_len; + bool stats_invalid:1; /// true if num_objects_dirty is not accurate (because it was not /// maintained starting from pool creation) @@ -1982,6 +1986,7 @@ struct pg_stat_t { mapping_epoch(0), up_primary(-1), acting_primary(-1), + snaptrimq_len(0), stats_invalid(false), dirty_stats_invalid(false), omap_stats_invalid(false), @@ -2010,17 +2015,29 @@ struct pg_stat_t { log_size = f; if (ondisk_log_size < f) ondisk_log_size = f; + if (snaptrimq_len < f) + snaptrimq_len = f; } void add(const pg_stat_t& o) { stats.add(o.stats); log_size += o.log_size; ondisk_log_size += o.ondisk_log_size; + if (((uint64_t)snaptrimq_len + (uint64_t)o.snaptrimq_len) > (uint64_t)(1 << 31)) { + snaptrimq_len = 1 << 31; + } else { + snaptrimq_len += o.snaptrimq_len; + } } void sub(const pg_stat_t& o) { stats.sub(o.stats); log_size -= o.log_size; ondisk_log_size -= o.ondisk_log_size; + if (o.snaptrimq_len < snaptrimq_len) { + snaptrimq_len -= o.snaptrimq_len; + } else { + snaptrimq_len = 0; + } } bool is_acting_osd(int32_t osd, bool primary) const;