From: Brad Hubbard Date: Tue, 11 Dec 2018 04:00:35 +0000 (+1000) Subject: luminous: osd: Implement lazy omap usage statistics X-Git-Tag: v12.2.13~147^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=4c2150d93bffe2b5adf3123fa594a241fe6b4d8a;p=ceph.git luminous: osd: Implement lazy omap usage statistics Opportunistic gathering of omap statistics during deep scrub. Signed-off-by: Brad Hubbard (cherry picked from commit 9e21ef0e40ff89b10dfdf857c703336496b3171f) Conflicts: doc/dev/placement-group.rst: Trivial src/osd/PGBackend.cc: Small difference in ScrubMap::object init src/osd/osd_types.cc: Differences in structure versions and members src/osd/osd_types.h: Differences in structure versions and members --- diff --git a/doc/dev/placement-group.rst b/doc/dev/placement-group.rst index 3a48636562d..36803927488 100644 --- a/doc/dev/placement-group.rst +++ b/doc/dev/placement-group.rst @@ -182,3 +182,23 @@ User-visible PG States *forced_backfill* the PG has been marked for highest priority backfill + +======= +OMAP STATISTICS +=============== + +Omap statistics are gathered during deep scrub and displayed in the output of +the following commands:: + + ceph pg dump + ceph pg dump all + ceph pg dump summary + ceph pg dump pgs + ceph pg dump pools + ceph pg ls + +As these statistics are not updated continuously they may be quite inaccurate in +an environment where deep scrubs are run infrequently and/or there is a lot of +omap activity. As such they should not be relied on for exact accuracy but +rather used as a guide. Running a deep scrub and checking these statistics +immediately afterwards should give a good indication of current omap usage. diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index aa623c777c4..31a3e975a54 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -1775,6 +1775,8 @@ void PGMap::dump_pg_stats_plain( tab.define_column("MISPLACED", TextTable::LEFT, TextTable::RIGHT); tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT); tab.define_column("BYTES", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("OMAP_BYTES*", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("OMAP_KEYS*", TextTable::LEFT, TextTable::RIGHT); tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT); tab.define_column("DISK_LOG", TextTable::LEFT, TextTable::RIGHT); tab.define_column("STATE", TextTable::LEFT, TextTable::RIGHT); @@ -1814,6 +1816,8 @@ void PGMap::dump_pg_stats_plain( << st.stats.sum.num_objects_misplaced << st.stats.sum.num_objects_unfound << st.stats.sum.num_bytes + << st.stats.sum.num_omap_bytes + << st.stats.sum.num_omap_keys << st.log_size << st.ondisk_log_size << pg_state_string(st.state) @@ -1872,6 +1876,8 @@ void PGMap::dump_pool_stats(ostream& ss, bool header) const tab.define_column("MISPLACED", TextTable::LEFT, TextTable::RIGHT); tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT); tab.define_column("BYTES", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("OMAP_BYTES*", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("OMAP_KEYS*", TextTable::LEFT, TextTable::RIGHT); tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT); tab.define_column("DISK_LOG", TextTable::LEFT, TextTable::RIGHT); } else { @@ -1884,6 +1890,8 @@ void PGMap::dump_pool_stats(ostream& ss, bool header) const tab.define_column("", TextTable::LEFT, TextTable::RIGHT); tab.define_column("", TextTable::LEFT, TextTable::RIGHT); tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); } for (auto p = pg_pool_sum.begin(); @@ -1896,6 +1904,8 @@ void PGMap::dump_pool_stats(ostream& ss, bool header) const << p->second.stats.sum.num_objects_misplaced << p->second.stats.sum.num_objects_unfound << p->second.stats.sum.num_bytes + << p->second.stats.sum.num_omap_bytes + << p->second.stats.sum.num_omap_keys << p->second.log_size << p->second.ondisk_log_size << TextTable::endrow; @@ -1916,6 +1926,8 @@ void PGMap::dump_pg_sum_stats(ostream& ss, bool header) const tab.define_column("MISPLACED", TextTable::LEFT, TextTable::RIGHT); tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT); tab.define_column("BYTES", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("OMAP_BYTES*", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("OMAP_KEYS*", TextTable::LEFT, TextTable::RIGHT); tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT); tab.define_column("DISK_LOG", TextTable::LEFT, TextTable::RIGHT); } else { @@ -1928,6 +1940,8 @@ void PGMap::dump_pg_sum_stats(ostream& ss, bool header) const tab.define_column("", TextTable::LEFT, TextTable::RIGHT); tab.define_column("", TextTable::LEFT, TextTable::RIGHT); tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); }; tab << "sum" @@ -1937,6 +1951,8 @@ void PGMap::dump_pg_sum_stats(ostream& ss, bool header) const << pg_sum.stats.sum.num_objects_misplaced << pg_sum.stats.sum.num_objects_unfound << pg_sum.stats.sum.num_bytes + << pg_sum.stats.sum.num_omap_bytes + << pg_sum.stats.sum.num_omap_keys << pg_sum.log_size << pg_sum.ondisk_log_size << TextTable::endrow; @@ -2396,6 +2412,8 @@ void PGMap::dump_filtered_pg_stats(ostream& ss, set& pgs) const tab.define_column("MISPLACED", TextTable::LEFT, TextTable::RIGHT); tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT); tab.define_column("BYTES", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("OMAP_BYTES*", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("OMAP_KEYS*", TextTable::LEFT, TextTable::RIGHT); tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT); tab.define_column("DISK_LOG", TextTable::LEFT, TextTable::RIGHT); tab.define_column("STATE", TextTable::LEFT, TextTable::RIGHT); @@ -2424,6 +2442,8 @@ void PGMap::dump_filtered_pg_stats(ostream& ss, set& pgs) const << st.stats.sum.num_objects_misplaced << st.stats.sum.num_objects_unfound << st.stats.sum.num_bytes + << st.stats.sum.num_omap_bytes + << st.stats.sum.num_omap_keys << st.log_size << st.ondisk_log_size << pg_state_string(st.state) @@ -3886,6 +3906,13 @@ int process_pg_map_command( string prefix = orig_prefix; map cmdmap = orig_cmdmap; + string omap_stats_note = + "\n* NOTE: Omap statistics are gathered during deep scrub and " + "may be inaccurate soon afterwards depending on utilisation. See " + "http://docs.ceph.com/docs/master/dev/placement-group/#omap-statistics " + "for further details.\n"; + bool omap_stats_note_required = false; + // perhaps these would be better in the parsing, but it's weird bool primary = false; if (prefix == "pg dump_json") { @@ -3980,10 +4007,12 @@ int process_pg_map_command( } else { if (what.count("all")) { pg_map.dump(ds); + omap_stats_note_required = true; } else if (what.count("summary") || what.count("sum")) { pg_map.dump_basic(ds); pg_map.dump_pg_sum_stats(ds, true); pg_map.dump_osd_sum_stats(ds); + omap_stats_note_required = true; } else { if (what.count("pgs_brief")) { pg_map.dump_pg_stats(ds, true); @@ -3992,15 +4021,20 @@ int process_pg_map_command( if (what.count("pgs")) { pg_map.dump_pg_stats(ds, false); header = false; + omap_stats_note_required = true; } if (what.count("pools")) { pg_map.dump_pool_stats(ds, header); + omap_stats_note_required = true; } if (what.count("osds")) { pg_map.dump_osd_stats(ds); } } odata->append(ds); + if (omap_stats_note_required) { + odata->append(omap_stats_note); + } } *ss << "dumped " << what; return 0; @@ -4054,6 +4088,7 @@ int process_pg_map_command( } else if (!pgs.empty()) { pg_map.dump_filtered_pg_stats(ds, pgs); odata->append(ds); + odata->append(omap_stats_note); } return 0; } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 26ccdeee574..f3c51fe0fc9 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -5261,8 +5261,9 @@ void PG::scrub_compare_maps() } stringstream ss; - get_pgbackend()->be_large_omap_check(maps, master_set, - scrubber.large_omap_objects, ss); + get_pgbackend()->be_omap_checks(maps, master_set, + scrubber.omap_stats, ss); + if (!ss.str().empty()) { osd->clog->warn(ss); } @@ -5461,7 +5462,13 @@ void PG::scrub_finish() info.history.last_clean_scrub_stamp = now; info.stats.stats.sum.num_shallow_scrub_errors = scrubber.shallow_errors; info.stats.stats.sum.num_deep_scrub_errors = scrubber.deep_errors; - info.stats.stats.sum.num_large_omap_objects = scrubber.large_omap_objects; + info.stats.stats.sum.num_large_omap_objects = scrubber.omap_stats.large_omap_objects; + info.stats.stats.sum.num_omap_bytes = scrubber.omap_stats.omap_bytes; + info.stats.stats.sum.num_omap_keys = scrubber.omap_stats.omap_keys; + dout(25) << __func__ << " shard " << pg_whoami << " num_omap_bytes = " + << info.stats.stats.sum.num_omap_bytes << " num_omap_keys = " + << info.stats.stats.sum.num_omap_keys << dendl; + publish_stats_to_osd(); } else { info.stats.stats.sum.num_shallow_scrub_errors = scrubber.shallow_errors; // XXX: last_clean_scrub_stamp doesn't mean the pg is not inconsistent diff --git a/src/osd/PG.h b/src/osd/PG.h index 87de99eb0a3..f1e91cf9bd9 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1340,7 +1340,6 @@ public: set waiting_on_whom; int shallow_errors; int deep_errors; - int large_omap_objects = 0; int fixed; ScrubMap primary_scrubmap; ScrubMapBuilder primary_scrubmap_pos; @@ -1351,6 +1350,8 @@ public: OpRequestRef active_rep_scrub; utime_t scrub_reg_stamp; // stamp we registered for + omap_stat_t omap_stats = (const struct omap_stat_t){ 0 }; + // For async sleep bool sleeping = false; bool needs_sleep = true; @@ -1484,8 +1485,8 @@ public: subset_last_update = eversion_t(); shallow_errors = 0; deep_errors = 0; - large_omap_objects = 0; fixed = 0; + omap_stats = (const struct omap_stat_t){ 0 }; deep = false; run_callbacks(); inconsistent.clear(); diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc index 9b3a15c4fef..e5a86e7e032 100644 --- a/src/osd/PGBackend.cc +++ b/src/osd/PGBackend.cc @@ -1255,29 +1255,34 @@ out: } } -void PGBackend::be_large_omap_check(const map &maps, +void PGBackend::be_omap_checks(const map &maps, const set &master_set, - int& large_omap_objects, + omap_stat_t& omap_stats, ostream &warnstream) const { - bool needs_check = false; + bool needs_omap_check = false; for (const auto& map : maps) { - if (map.second->has_large_omap_object_errors) { - needs_check = true; + if (map.second->has_large_omap_object_errors || map.second->has_omap_keys) { + needs_omap_check = true; break; } } - if (!needs_check) { - return; + if (!needs_omap_check) { + return; // Nothing to do } - // Iterate through objects and check large omap object flag + // Iterate through objects and update omap stats for (const auto& k : master_set) { for (const auto& map : maps) { - ScrubMap::object& obj = map.second->objects[k]; + auto it = map.second->objects.find(k); + if (it == map.second->objects.end()) + continue; + ScrubMap::object& obj = it->second; + omap_stats.omap_bytes += obj.object_omap_bytes; + omap_stats.omap_keys += obj.object_omap_keys; if (obj.large_omap_object_found) { - large_omap_objects++; + omap_stats.large_omap_objects++; warnstream << "Large omap object found. Object: " << k << " Key count: " << obj.large_omap_object_key_count << " Size (bytes): " << obj.large_omap_object_value_size << '\n'; diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index 7075a3ef48d..6a249c1b493 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -601,10 +601,10 @@ typedef ceph::shared_ptr OSDMapRef; ScrubMap &map, ScrubMapBuilder &pos, ScrubMap::object &o) = 0; - void be_large_omap_check( + void be_omap_checks( const map &maps, const set &master_set, - int& large_omap_objects, + omap_stat_t& omap_stats, ostream &warnstream) const; static PGBackend *build_pg_backend( diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc index 54a49aeaeea..03345a7951c 100644 --- a/src/osd/ReplicatedBackend.cc +++ b/src/osd/ReplicatedBackend.cc @@ -836,6 +836,15 @@ int ReplicatedBackend::be_deep_scrub( dout(20) << __func__ << " done with " << poid << " omap_digest " << std::hex << o.omap_digest << std::dec << dendl; + // Sum up omap usage + if (pos.omap_keys > 0 || pos.omap_bytes > 0) { + dout(25) << __func__ << " adding " << pos.omap_keys << " keys and " + << pos.omap_bytes << " bytes to pg_stats sums" << dendl; + map.has_omap_keys = true; + o.object_omap_bytes = pos.omap_bytes; + o.object_omap_keys = pos.omap_keys; + } + // done! return 0; } diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 1193243c79a..edaf334e880 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -1982,11 +1982,13 @@ void object_stat_sum_t::dump(Formatter *f) const f->dump_int("num_objects_pinned", num_objects_pinned); f->dump_int("num_legacy_snapsets", num_legacy_snapsets); f->dump_int("num_large_omap_objects", num_large_omap_objects); + f->dump_int("num_omap_bytes", num_omap_bytes); + f->dump_int("num_omap_keys", num_omap_keys); } void object_stat_sum_t::encode(bufferlist& bl) const { - ENCODE_START(17, 14, bl); + ENCODE_START(18, 14, bl); #if defined(CEPH_LITTLE_ENDIAN) bl.append((char *)(&num_bytes), sizeof(object_stat_sum_t)); #else @@ -2026,6 +2028,8 @@ void object_stat_sum_t::encode(bufferlist& bl) const ::encode(num_objects_missing, bl); ::encode(num_legacy_snapsets, bl); ::encode(num_large_omap_objects, bl); + ::encode(num_omap_bytes, bl); + ::encode(num_omap_keys, bl); #endif ENCODE_FINISH(bl); } @@ -2033,9 +2037,9 @@ void object_stat_sum_t::encode(bufferlist& bl) const void object_stat_sum_t::decode(bufferlist::iterator& bl) { bool decode_finish = false; - DECODE_START(17, bl); // make sure to also update fast decode below + DECODE_START(18, bl); // make sure to also update fast decode below #if defined(CEPH_LITTLE_ENDIAN) - if (struct_v >= 17) { // this must match newest decode version + if (struct_v >= 18) { // this must match newest decode version bl.copy(sizeof(object_stat_sum_t), (char*)(&num_bytes)); decode_finish = true; } @@ -2083,6 +2087,10 @@ void object_stat_sum_t::decode(bufferlist::iterator& bl) if (struct_v >= 17) { ::decode(num_large_omap_objects, bl); } + if (struct_v >= 18) { + ::decode(num_omap_bytes, bl); + ::decode(num_omap_keys, bl); + } } DECODE_FINISH(bl); } @@ -2123,6 +2131,8 @@ void object_stat_sum_t::generate_test_instances(list& o) a.num_evict_mode_full = 0; a.num_objects_pinned = 20; a.num_large_omap_objects = 5; + a.num_omap_bytes = 20000; + a.num_omap_keys = 200; o.push_back(new object_stat_sum_t(a)); } @@ -2164,6 +2174,8 @@ void object_stat_sum_t::add(const object_stat_sum_t& o) num_objects_pinned += o.num_objects_pinned; num_legacy_snapsets += o.num_legacy_snapsets; num_large_omap_objects += o.num_large_omap_objects; + num_omap_bytes += o.num_omap_bytes; + num_omap_keys += o.num_omap_keys; } void object_stat_sum_t::sub(const object_stat_sum_t& o) @@ -2204,6 +2216,8 @@ void object_stat_sum_t::sub(const object_stat_sum_t& o) num_objects_pinned -= o.num_objects_pinned; num_legacy_snapsets -= o.num_legacy_snapsets; num_large_omap_objects -= o.num_large_omap_objects; + num_omap_bytes -= o.num_omap_bytes; + num_omap_keys -= o.num_omap_keys; } bool operator==(const object_stat_sum_t& l, const object_stat_sum_t& r) @@ -2244,7 +2258,9 @@ bool operator==(const object_stat_sum_t& l, const object_stat_sum_t& r) l.num_evict_mode_full == r.num_evict_mode_full && l.num_objects_pinned == r.num_objects_pinned && l.num_legacy_snapsets == r.num_legacy_snapsets && - l.num_large_omap_objects == r.num_large_omap_objects; + l.num_large_omap_objects == r.num_large_omap_objects && + l.num_omap_bytes == r.num_omap_bytes && + l.num_omap_keys == r.num_omap_keys; } // -- object_stat_collection_t -- @@ -5902,7 +5918,7 @@ void ScrubMap::generate_test_instances(list& o) void ScrubMap::object::encode(bufferlist& bl) const { bool compat_read_error = read_error || ec_hash_mismatch || ec_size_mismatch; - ENCODE_START(9, 7, bl); + ENCODE_START(10, 7, bl); ::encode(size, bl); ::encode(negative, bl); ::encode(attrs, bl); @@ -5920,12 +5936,14 @@ void ScrubMap::object::encode(bufferlist& bl) const ::encode(large_omap_object_found, bl); ::encode(large_omap_object_key_count, bl); ::encode(large_omap_object_value_size, bl); + ::encode(object_omap_bytes, bl); + ::encode(object_omap_keys, bl); ENCODE_FINISH(bl); } void ScrubMap::object::decode(bufferlist::iterator& bl) { - DECODE_START(9, bl); + DECODE_START(10, bl); ::decode(size, bl); bool tmp, compat_read_error = false; ::decode(tmp, bl); @@ -5963,6 +5981,10 @@ void ScrubMap::object::decode(bufferlist::iterator& bl) ::decode(large_omap_object_key_count, bl); ::decode(large_omap_object_value_size, bl); } + if (struct_v >= 10) { + ::decode(object_omap_bytes, bl); + ::decode(object_omap_keys, bl); + } DECODE_FINISH(bl); } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index a3e555cb8d7..2db47e8d238 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1680,6 +1680,8 @@ struct object_stat_sum_t { int64_t num_objects_missing; int64_t num_legacy_snapsets; ///< upper bound on pre-luminous-style SnapSets int64_t num_large_omap_objects = 0; + int64_t num_omap_bytes = 0; + int64_t num_omap_keys = 0; object_stat_sum_t() : num_bytes(0), @@ -1727,6 +1729,8 @@ struct object_stat_sum_t { FLOOR(num_wr); FLOOR(num_wr_kb); FLOOR(num_large_omap_objects); + FLOOR(num_omap_bytes); + FLOOR(num_omap_keys); FLOOR(num_shallow_scrub_errors); FLOOR(num_deep_scrub_errors); num_scrub_errors = num_shallow_scrub_errors + num_deep_scrub_errors; @@ -1788,6 +1792,8 @@ struct object_stat_sum_t { out[i].num_deep_scrub_errors; } SPLIT(num_large_omap_objects); + SPLIT(num_omap_bytes); + SPLIT(num_omap_keys); SPLIT(num_objects_recovered); SPLIT(num_bytes_recovered); SPLIT(num_keys_recovered); @@ -1843,6 +1849,8 @@ struct object_stat_sum_t { sizeof(num_wr_kb) + sizeof(num_scrub_errors) + sizeof(num_large_omap_objects) + + sizeof(num_omap_bytes) + + sizeof(num_omap_keys) + sizeof(num_objects_recovered) + sizeof(num_bytes_recovered) + sizeof(num_keys_recovered) + @@ -4961,6 +4969,8 @@ struct ScrubMap { bool large_omap_object_found:1; uint64_t large_omap_object_key_count = 0; uint64_t large_omap_object_value_size = 0; + uint64_t object_omap_bytes = 0; + uint64_t object_omap_keys = 0; object() : // Init invalid size so it won't match if we get a stat EIO error @@ -4981,6 +4991,7 @@ struct ScrubMap { eversion_t incr_since; bool has_large_omap_object_errors:1; boost::optional has_builtin_csum; + bool has_omap_keys:1; void merge_incr(const ScrubMap &l); void clear_from(const hobject_t& start) { @@ -5372,4 +5383,11 @@ struct store_statfs_t }; ostream &operator<<(ostream &lhs, const store_statfs_t &rhs); +// omap specific stats +struct omap_stat_t { + int large_omap_objects; + int64_t omap_bytes; + int64_t omap_keys; +}; + #endif