From ca4413dee9a148c5ffda235793b02499a3975e88 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Piotr=20Da=C5=82ek?= Date: Wed, 13 Dec 2017 15:23:55 +0100 Subject: [PATCH] osd, PG: share the snaptrimq.size() within pg_stat_t MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit That way it will be unnecessary to go through all pgs separately to find pgs with excessively long snap trim queues. And we don't need to share snap trim queues itself, which may be large by itself. As snap trim queues tend to be short and anything above 50 000 I consider absurdly large, the snaptrimq_len is capped at 2^32 to save space in pg_stat_t. Signed-off-by: Piotr Dałek (cherry picked from commit 022d246f17b31d309525395831c84e9856533adb) Conflicts: src/osd/osd_types.cc - needed a fix for snaptrimq_len that was placed after two other new fields --- src/mon/PGMap.cc | 2 ++ src/osd/PG.cc | 1 + src/osd/osd_types.cc | 11 +++++++++-- src/osd/osd_types.h | 17 +++++++++++++++++ 4 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index 2373691a3f74..834da03debc9 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -1785,6 +1785,7 @@ void PGMap::dump_pg_stats_plain( tab.define_column("SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT); tab.define_column("LAST_DEEP_SCRUB", TextTable::LEFT, TextTable::RIGHT); tab.define_column("DEEP_SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("SNAPTRIMQ_LEN", TextTable::LEFT, TextTable::RIGHT); } for (auto i = pg_stats.begin(); @@ -1823,6 +1824,7 @@ void PGMap::dump_pg_stats_plain( << st.last_scrub_stamp << st.last_deep_scrub << st.last_deep_scrub_stamp + << st.snaptrimq_len << TextTable::endrow; } } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 139a9ac6d179..255e0d375155 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2566,6 +2566,7 @@ void PG::_update_calc_stats() info.stats.ondisk_log_size = info.stats.log_size; info.stats.log_start = pg_log.get_tail(); info.stats.ondisk_log_start = pg_log.get_tail(); + info.stats.snaptrimq_len = snap_trimq.size(); // If actingset is larger then upset we will have misplaced, // so we will report based on actingset size. diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index b22001af6f00..a8f50467affa 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -2287,6 +2287,7 @@ void pg_stat_t::dump(Formatter *f) const f->dump_bool("hitset_stats_invalid", hitset_stats_invalid); f->dump_bool("hitset_bytes_stats_invalid", hitset_bytes_stats_invalid); f->dump_bool("pin_stats_invalid", pin_stats_invalid); + f->dump_unsigned("snaptrimq_len", snaptrimq_len); stats.dump(f); f->open_array_section("up"); for (vector::const_iterator p = up.begin(); p != up.end(); ++p) @@ -2322,7 +2323,7 @@ void pg_stat_t::dump_brief(Formatter *f) const void pg_stat_t::encode(bufferlist &bl) const { - ENCODE_START(22, 22, bl); + ENCODE_START(23, 22, bl); ::encode(version, bl); ::encode(reported_seq, bl); ::encode(reported_epoch, bl); @@ -2363,6 +2364,7 @@ void pg_stat_t::encode(bufferlist &bl) const ::encode(last_peered, bl); ::encode(last_became_peered, bl); ::encode(pin_stats_invalid, bl); + ::encode(snaptrimq_len, bl); ENCODE_FINISH(bl); } @@ -2416,6 +2418,9 @@ void pg_stat_t::decode(bufferlist::iterator &bl) ::decode(last_became_peered, bl); ::decode(tmp, bl); pin_stats_invalid = tmp; + if (struct_v >= 23) { + ::decode(snaptrimq_len, bl); + } DECODE_FINISH(bl); } @@ -2447,6 +2452,7 @@ void pg_stat_t::generate_test_instances(list& o) a.last_deep_scrub = eversion_t(13, 14); a.last_deep_scrub_stamp = utime_t(15, 16); a.last_clean_scrub_stamp = utime_t(17, 18); + a.snaptrimq_len = 1048576; list l; object_stat_collection_t::generate_test_instances(l); a.stats = *l.back(); @@ -2509,7 +2515,8 @@ bool operator==(const pg_stat_t& l, const pg_stat_t& r) l.hitset_bytes_stats_invalid == r.hitset_bytes_stats_invalid && l.up_primary == r.up_primary && l.acting_primary == r.acting_primary && - l.pin_stats_invalid == r.pin_stats_invalid; + l.pin_stats_invalid == r.pin_stats_invalid && + l.snaptrimq_len == r.snaptrimq_len; } // -- pool_stat_t -- diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index a820c8f6bfa7..b2afa4658d10 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1947,6 +1947,10 @@ struct pg_stat_t { int32_t up_primary; int32_t acting_primary; + // snaptrimq.size() is 64bit, but let's be serious - anything over 50k is + // absurd already, so cap it to 2^32 and save 4 bytes at the same time + uint32_t snaptrimq_len; + bool stats_invalid:1; /// true if num_objects_dirty is not accurate (because it was not /// maintained starting from pool creation) @@ -1966,6 +1970,7 @@ struct pg_stat_t { mapping_epoch(0), up_primary(-1), acting_primary(-1), + snaptrimq_len(0), stats_invalid(false), dirty_stats_invalid(false), omap_stats_invalid(false), @@ -1994,17 +1999,29 @@ struct pg_stat_t { log_size = f; if (ondisk_log_size < f) ondisk_log_size = f; + if (snaptrimq_len < f) + snaptrimq_len = f; } void add(const pg_stat_t& o) { stats.add(o.stats); log_size += o.log_size; ondisk_log_size += o.ondisk_log_size; + if (((uint64_t)snaptrimq_len + (uint64_t)o.snaptrimq_len) > (uint64_t)(1 << 31)) { + snaptrimq_len = 1 << 31; + } else { + snaptrimq_len += o.snaptrimq_len; + } } void sub(const pg_stat_t& o) { stats.sub(o.stats); log_size -= o.log_size; ondisk_log_size -= o.ondisk_log_size; + if (o.snaptrimq_len < snaptrimq_len) { + snaptrimq_len -= o.snaptrimq_len; + } else { + snaptrimq_len = 0; + } } bool is_acting_osd(int32_t osd, bool primary) const; -- 2.47.3