Opportunistic gathering of omap statistics during deep scrub.
Signed-off-by: Brad Hubbard <bhubbard@redhat.com>
happens when pg_num_pending < pg_num, and applies to the PGs with
pg_num_pending <= ps < pg_num as well as the corresponding peer PG
that it is merging with.
+
+
+OMAP STATISTICS
+===============
+
+Omap statistics are gathered during deep scrub and displayed in the output of
+the following commands::
+
+ ceph pg dump
+ ceph pg dump all
+ ceph pg dump summary
+ ceph pg dump pgs
+ ceph pg dump pools
+ ceph pg ls
+
+As these statistics are not updated continuously they may be quite inaccurate in
+an environment where deep scrubs are run infrequently and/or there is a lot of
+omap activity. As such they should not be relied on for exact accuracy but
+rather used as a guide. Running a deep scrub and checking these statistics
+immediately afterwards should give a good indication of current omap usage.
tab.define_column("MISPLACED", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("BYTES", TextTable::LEFT, TextTable::RIGHT);
+ tab.define_column("OMAP_BYTES*", TextTable::LEFT, TextTable::RIGHT);
+ tab.define_column("OMAP_KEYS*", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("DISK_LOG", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("STATE", TextTable::LEFT, TextTable::RIGHT);
<< st.stats.sum.num_objects_misplaced
<< st.stats.sum.num_objects_unfound
<< st.stats.sum.num_bytes
+ << st.stats.sum.num_omap_bytes
+ << st.stats.sum.num_omap_keys
<< st.log_size
<< st.ondisk_log_size
<< pg_state_string(st.state)
tab.define_column("MISPLACED", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("BYTES", TextTable::LEFT, TextTable::RIGHT);
+ tab.define_column("OMAP_BYTES*", TextTable::LEFT, TextTable::RIGHT);
+ tab.define_column("OMAP_KEYS*", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("DISK_LOG", TextTable::LEFT, TextTable::RIGHT);
} else {
tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
+ tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
+ tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
}
for (auto p = pg_pool_sum.begin();
<< p->second.stats.sum.num_objects_misplaced
<< p->second.stats.sum.num_objects_unfound
<< p->second.stats.sum.num_bytes
+ << p->second.stats.sum.num_omap_bytes
+ << p->second.stats.sum.num_omap_keys
<< p->second.log_size
<< p->second.ondisk_log_size
<< TextTable::endrow;
tab.define_column("MISPLACED", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("BYTES", TextTable::LEFT, TextTable::RIGHT);
+ tab.define_column("OMAP_BYTES*", TextTable::LEFT, TextTable::RIGHT);
+ tab.define_column("OMAP_KEYS*", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("DISK_LOG", TextTable::LEFT, TextTable::RIGHT);
} else {
tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
+ tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
+ tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
};
tab << "sum"
<< pg_sum.stats.sum.num_objects_misplaced
<< pg_sum.stats.sum.num_objects_unfound
<< pg_sum.stats.sum.num_bytes
+ << pg_sum.stats.sum.num_omap_bytes
+ << pg_sum.stats.sum.num_omap_keys
<< pg_sum.log_size
<< pg_sum.ondisk_log_size
<< TextTable::endrow;
tab.define_column("MISPLACED", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("BYTES", TextTable::LEFT, TextTable::RIGHT);
+ tab.define_column("OMAP_BYTES*", TextTable::LEFT, TextTable::RIGHT);
+ tab.define_column("OMAP_KEYS*", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("STATE", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("SINCE", TextTable::LEFT, TextTable::RIGHT);
<< st.stats.sum.num_objects_misplaced
<< st.stats.sum.num_objects_unfound
<< st.stats.sum.num_bytes
+ << st.stats.sum.num_omap_bytes
+ << st.stats.sum.num_omap_keys
<< st.log_size
<< pg_state_string(st.state)
<< utimespan_str(now - st.last_change)
string prefix = orig_prefix;
auto cmdmap = orig_cmdmap;
+ string omap_stats_note =
+ "\n* NOTE: Omap statistics are gathered during deep scrub and "
+ "may be inaccurate soon afterwards depending on utilisation. See "
+ "http://docs.ceph.com/docs/master/dev/placement-group/#omap-statistics "
+ "for further details.\n";
+ bool omap_stats_note_required = false;
+
// perhaps these would be better in the parsing, but it's weird
bool primary = false;
if (prefix == "pg dump_json") {
} else {
if (what.count("all")) {
pg_map.dump(ds);
+ omap_stats_note_required = true;
} else if (what.count("summary") || what.count("sum")) {
pg_map.dump_basic(ds);
pg_map.dump_pg_sum_stats(ds, true);
pg_map.dump_osd_sum_stats(ds);
+ omap_stats_note_required = true;
} else {
if (what.count("pgs_brief")) {
pg_map.dump_pg_stats(ds, true);
if (what.count("pgs")) {
pg_map.dump_pg_stats(ds, false);
header = false;
+ omap_stats_note_required = true;
}
if (what.count("pools")) {
pg_map.dump_pool_stats(ds, header);
+ omap_stats_note_required = true;
}
if (what.count("osds")) {
pg_map.dump_osd_stats(ds);
}
}
odata->append(ds);
+ if (omap_stats_note_required) {
+ odata->append(omap_stats_note);
+ }
}
*ss << "dumped " << what;
return 0;
} else if (!pgs.empty()) {
pg_map.dump_filtered_pg_stats(ds, pgs);
odata->append(ds);
+ odata->append(omap_stats_note);
}
return 0;
}
}
stringstream ss;
- get_pgbackend()->be_large_omap_check(maps, master_set,
- scrubber.large_omap_objects, ss);
+ get_pgbackend()->be_omap_checks(maps, master_set,
+ scrubber.omap_stats, ss);
+
if (!ss.str().empty()) {
osd->clog->warn(ss);
}
info.history.last_clean_scrub_stamp = now;
info.stats.stats.sum.num_shallow_scrub_errors = scrubber.shallow_errors;
info.stats.stats.sum.num_deep_scrub_errors = scrubber.deep_errors;
- info.stats.stats.sum.num_large_omap_objects = scrubber.large_omap_objects;
+ info.stats.stats.sum.num_large_omap_objects = scrubber.omap_stats.large_omap_objects;
+ info.stats.stats.sum.num_omap_bytes = scrubber.omap_stats.omap_bytes;
+ info.stats.stats.sum.num_omap_keys = scrubber.omap_stats.omap_keys;
+ dout(25) << __func__ << " shard " << pg_whoami << " num_omap_bytes = "
+ << info.stats.stats.sum.num_omap_bytes << " num_omap_keys = "
+ << info.stats.stats.sum.num_omap_keys << dendl;
+ publish_stats_to_osd();
} else {
info.stats.stats.sum.num_shallow_scrub_errors = scrubber.shallow_errors;
// XXX: last_clean_scrub_stamp doesn't mean the pg is not inconsistent
set<pg_shard_t> waiting_on_whom;
int shallow_errors;
int deep_errors;
- int large_omap_objects = 0;
int fixed;
ScrubMap primary_scrubmap;
ScrubMapBuilder primary_scrubmap_pos;
OpRequestRef active_rep_scrub;
utime_t scrub_reg_stamp; // stamp we registered for
+ omap_stat_t omap_stats = (const struct omap_stat_t){ 0 };
+
// For async sleep
bool sleeping = false;
bool needs_sleep = true;
subset_last_update = eversion_t();
shallow_errors = 0;
deep_errors = 0;
- large_omap_objects = 0;
fixed = 0;
+ omap_stats = (const struct omap_stat_t){ 0 };
deep = false;
run_callbacks();
inconsistent.clear();
}
}
-void PGBackend::be_large_omap_check(const map<pg_shard_t,ScrubMap*> &maps,
+void PGBackend::be_omap_checks(const map<pg_shard_t,ScrubMap*> &maps,
const set<hobject_t> &master_set,
- int& large_omap_objects,
+ omap_stat_t& omap_stats,
ostream &warnstream) const
{
- bool needs_check = false;
+ bool needs_omap_check = false;
for (const auto& map : maps) {
- if (map.second->has_large_omap_object_errors) {
- needs_check = true;
+ if (map.second->has_large_omap_object_errors || map.second->has_omap_keys) {
+ needs_omap_check = true;
break;
}
}
- if (!needs_check) {
- return;
+ if (!needs_omap_check) {
+ return; // Nothing to do
}
- // Iterate through objects and check large omap object flag
+ // Iterate through objects and update omap stats
for (const auto& k : master_set) {
for (const auto& map : maps) {
auto it = map.second->objects.find(k);
if (it == map.second->objects.end())
continue;
ScrubMap::object& obj = it->second;
+ omap_stats.omap_bytes += obj.object_omap_bytes;
+ omap_stats.omap_keys += obj.object_omap_keys;
if (obj.large_omap_object_found) {
- large_omap_objects++;
+ omap_stats.large_omap_objects++;
warnstream << "Large omap object found. Object: " << k << " Key count: "
<< obj.large_omap_object_key_count << " Size (bytes): "
<< obj.large_omap_object_value_size << '\n';
ScrubMap &map,
ScrubMapBuilder &pos,
ScrubMap::object &o) = 0;
- void be_large_omap_check(
+ void be_omap_checks(
const map<pg_shard_t,ScrubMap*> &maps,
const set<hobject_t> &master_set,
- int& large_omap_objects,
+ omap_stat_t& omap_stats,
ostream &warnstream) const;
static PGBackend *build_pg_backend(
dout(20) << __func__ << " done with " << poid << " omap_digest "
<< std::hex << o.omap_digest << std::dec << dendl;
+ // Sum up omap usage
+ if (pos.omap_keys > 0 || pos.omap_bytes > 0) {
+ dout(25) << __func__ << " adding " << pos.omap_keys << " keys and "
+ << pos.omap_bytes << " bytes to pg_stats sums" << dendl;
+ map.has_omap_keys = true;
+ o.object_omap_bytes = pos.omap_bytes;
+ o.object_omap_keys = pos.omap_keys;
+ }
+
// done!
return 0;
}
f->dump_int("num_legacy_snapsets", num_legacy_snapsets);
f->dump_int("num_large_omap_objects", num_large_omap_objects);
f->dump_int("num_objects_manifest", num_objects_manifest);
+ f->dump_int("num_omap_bytes", num_omap_bytes);
+ f->dump_int("num_omap_keys", num_omap_keys);
}
void object_stat_sum_t::encode(bufferlist& bl) const
{
- ENCODE_START(18, 14, bl);
+ ENCODE_START(19, 14, bl);
#if defined(CEPH_LITTLE_ENDIAN)
bl.append((char *)(&num_bytes), sizeof(object_stat_sum_t));
#else
encode(num_legacy_snapsets, bl);
encode(num_large_omap_objects, bl);
encode(num_objects_manifest, bl);
+ encode(num_omap_bytes, bl);
+ encode(num_omap_keys, bl);
#endif
ENCODE_FINISH(bl);
}
void object_stat_sum_t::decode(bufferlist::const_iterator& bl)
{
bool decode_finish = false;
- DECODE_START(18, bl); // make sure to also update fast decode below
+ DECODE_START(19, bl); // make sure to also update fast decode below
#if defined(CEPH_LITTLE_ENDIAN)
- if (struct_v >= 18) { // this must match newest decode version
+ if (struct_v >= 19) { // this must match newest decode version
bl.copy(sizeof(object_stat_sum_t), (char*)(&num_bytes));
decode_finish = true;
}
if (struct_v >= 18) {
decode(num_objects_manifest, bl);
}
+ if (struct_v >= 19) {
+ decode(num_omap_bytes, bl);
+ decode(num_omap_keys, bl);
+ }
}
DECODE_FINISH(bl);
}
a.num_objects_pinned = 20;
a.num_large_omap_objects = 5;
a.num_objects_manifest = 2;
+ a.num_omap_bytes = 20000;
+ a.num_omap_keys = 200;
o.push_back(new object_stat_sum_t(a));
}
num_legacy_snapsets += o.num_legacy_snapsets;
num_large_omap_objects += o.num_large_omap_objects;
num_objects_manifest += o.num_objects_manifest;
+ num_omap_bytes += o.num_omap_bytes;
+ num_omap_keys += o.num_omap_keys;
}
void object_stat_sum_t::sub(const object_stat_sum_t& o)
num_legacy_snapsets -= o.num_legacy_snapsets;
num_large_omap_objects -= o.num_large_omap_objects;
num_objects_manifest -= o.num_objects_manifest;
+ num_omap_bytes -= o.num_omap_bytes;
+ num_omap_keys -= o.num_omap_keys;
}
bool operator==(const object_stat_sum_t& l, const object_stat_sum_t& r)
l.num_objects_pinned == r.num_objects_pinned &&
l.num_legacy_snapsets == r.num_legacy_snapsets &&
l.num_large_omap_objects == r.num_large_omap_objects &&
- l.num_objects_manifest == r.num_objects_manifest;
+ l.num_objects_manifest == r.num_objects_manifest &&
+ l.num_omap_bytes == r.num_omap_bytes &&
+ l.num_omap_keys == r.num_omap_keys;
}
// -- object_stat_collection_t --
void ScrubMap::object::encode(bufferlist& bl) const
{
bool compat_read_error = read_error || ec_hash_mismatch || ec_size_mismatch;
- ENCODE_START(9, 7, bl);
+ ENCODE_START(10, 7, bl);
encode(size, bl);
encode(negative, bl);
encode(attrs, bl);
encode(large_omap_object_found, bl);
encode(large_omap_object_key_count, bl);
encode(large_omap_object_value_size, bl);
+ encode(object_omap_bytes, bl);
+ encode(object_omap_keys, bl);
ENCODE_FINISH(bl);
}
void ScrubMap::object::decode(bufferlist::const_iterator& bl)
{
- DECODE_START(9, bl);
+ DECODE_START(10, bl);
decode(size, bl);
bool tmp, compat_read_error = false;
decode(tmp, bl);
decode(large_omap_object_key_count, bl);
decode(large_omap_object_value_size, bl);
}
+ if (struct_v >= 10) {
+ decode(object_omap_bytes, bl);
+ decode(object_omap_keys, bl);
+ }
DECODE_FINISH(bl);
}
int64_t num_legacy_snapsets; ///< upper bound on pre-luminous-style SnapSets
int64_t num_large_omap_objects = 0;
int64_t num_objects_manifest = 0;
+ int64_t num_omap_bytes = 0;
+ int64_t num_omap_keys = 0;
object_stat_sum_t()
: num_bytes(0),
FLOOR(num_wr_kb);
FLOOR(num_large_omap_objects);
FLOOR(num_objects_manifest);
+ FLOOR(num_omap_bytes);
+ FLOOR(num_omap_keys);
FLOOR(num_shallow_scrub_errors);
FLOOR(num_deep_scrub_errors);
num_scrub_errors = num_shallow_scrub_errors + num_deep_scrub_errors;
SPLIT(num_wr_kb);
SPLIT(num_large_omap_objects);
SPLIT(num_objects_manifest);
+ SPLIT(num_omap_bytes);
+ SPLIT(num_omap_keys);
SPLIT_PRESERVE_NONZERO(num_shallow_scrub_errors);
SPLIT_PRESERVE_NONZERO(num_deep_scrub_errors);
for (unsigned i = 0; i < out.size(); ++i) {
sizeof(num_scrub_errors) +
sizeof(num_large_omap_objects) +
sizeof(num_objects_manifest) +
+ sizeof(num_omap_bytes) +
+ sizeof(num_omap_keys) +
sizeof(num_objects_recovered) +
sizeof(num_bytes_recovered) +
sizeof(num_keys_recovered) +
bool large_omap_object_found:1;
uint64_t large_omap_object_key_count = 0;
uint64_t large_omap_object_value_size = 0;
+ uint64_t object_omap_bytes = 0;
+ uint64_t object_omap_keys = 0;
object() :
// Init invalid size so it won't match if we get a stat EIO error
eversion_t valid_through;
eversion_t incr_since;
bool has_large_omap_object_errors:1;
+ bool has_omap_keys:1;
void merge_incr(const ScrubMap &l);
void clear_from(const hobject_t& start) {
};
WRITE_CLASS_ENCODER(pool_pg_num_history_t)
+// omap specific stats
+struct omap_stat_t {
+ int large_omap_objects;
+ int64_t omap_bytes;
+ int64_t omap_keys;
+};
+
#endif