From: Sage Weil Date: Sat, 13 May 2017 02:27:11 +0000 (-0500) Subject: mon/PGMap: move summary information into parent PGMapDigest object X-Git-Tag: ses5-milestone6~8^2~19^2~101 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8f2521617ee31453909ddd0a6a5fd177c8fbf034;p=ceph.git mon/PGMap: move summary information into parent PGMapDigest object Everything summary-ish that we need to send to the mon is moved into a parent class. The child PGMap retains the detail. The parent gets its own encode(), and PGMap::encode_digest() will call it to encode just the summary info. Squashed in here is a new num_pg_by_osd that could have been done in a preceding patch but I did things in the wrong order. :( Signed-off-by: Sage Weil --- diff --git a/src/mgr/PyModules.cc b/src/mgr/PyModules.cc index 5ba0b69b90e..b9596bb5f97 100644 --- a/src/mgr/PyModules.cc +++ b/src/mgr/PyModules.cc @@ -237,7 +237,7 @@ PyObject *PyModules::get_python(const std::string &what) cluster_state.with_pgmap( [&osd_map, &f](const PGMap &pg_map) { pg_map.dump_fs_stats(nullptr, &f, true); - pg_map.dump_pool_stats(osd_map, nullptr, &f, true); + pg_map.dump_pool_stats_full(osd_map, nullptr, &f, true); }); }); return f.get(); diff --git a/src/mon/MgrMonitor.cc b/src/mon/MgrMonitor.cc index ab8a533c580..da818e2997a 100644 --- a/src/mon/MgrMonitor.cc +++ b/src/mon/MgrMonitor.cc @@ -100,7 +100,7 @@ public: } void dump_pool_stats(const OSDMap& osdm, stringstream *ss, Formatter *f, bool verbose) const { - parent.dump_pool_stats(osdm, ss, f, verbose); + parent.dump_pool_stats_full(osdm, ss, f, verbose); } int process_pg_command(const string& prefix, diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index 08107e6c567..5bcecd97a67 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -14,545 +14,1372 @@ #define dout_context g_ceph_context -// -- +// --------------------- +// PGMapDigest -void PGMap::Incremental::encode(bufferlist &bl, uint64_t features) const +void PGMapDigest::encode(bufferlist& bl, uint64_t features) const { - if ((features & CEPH_FEATURE_MONENC) == 0) { - __u8 v = 4; - ::encode(v, bl); - ::encode(version, bl); - ::encode(pg_stat_updates, bl); - ::encode(osd_stat_updates, bl); - ::encode(osd_stat_rm, bl); - ::encode(osdmap_epoch, bl); - ::encode(pg_scan, bl); - ::encode(full_ratio, bl); - ::encode(nearfull_ratio, bl); - ::encode(pg_remove, bl); - return; - } - - ENCODE_START(7, 5, bl); - ::encode(version, bl); - ::encode(pg_stat_updates, bl); - ::encode(osd_stat_updates, bl); - ::encode(osd_stat_rm, bl); - ::encode(osdmap_epoch, bl); - ::encode(pg_scan, bl); - ::encode(full_ratio, bl); - ::encode(nearfull_ratio, bl); - ::encode(pg_remove, bl); - ::encode(stamp, bl); - ::encode(osd_epochs, bl); + // NOTE: see PGMap::encode_digest + ENCODE_START(1, 1, bl); + ::encode(num_pg, bl); + ::encode(num_pg_active, bl); + ::encode(num_osd, bl); + ::encode(osd_stat, bl); + ::encode(pg_pool_sum, bl, features); + ::encode(osd_sum, bl); + ::encode(pg_sum, bl, features); + ::encode(num_pg_by_state, bl); + ::encode(num_pg_by_osd, bl); ENCODE_FINISH(bl); } -void PGMap::Incremental::decode(bufferlist::iterator &bl) +void PGMapDigest::decode(bufferlist::iterator& p) { - DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl); - ::decode(version, bl); - if (struct_v < 3) { - pg_stat_updates.clear(); - __u32 n; - ::decode(n, bl); - while (n--) { - old_pg_t opgid; - ::decode(opgid, bl); - pg_t pgid = opgid; - ::decode(pg_stat_updates[pgid], bl); - } - } else { - ::decode(pg_stat_updates, bl); - } - ::decode(osd_stat_updates, bl); - ::decode(osd_stat_rm, bl); - ::decode(osdmap_epoch, bl); - ::decode(pg_scan, bl); - if (struct_v >= 2) { - ::decode(full_ratio, bl); - ::decode(nearfull_ratio, bl); - } - if (struct_v < 3) { - pg_remove.clear(); - __u32 n; - ::decode(n, bl); - while (n--) { - old_pg_t opgid; - ::decode(opgid, bl); - pg_remove.insert(pg_t(opgid)); - } - } else { - ::decode(pg_remove, bl); - } - if (struct_v < 4 && full_ratio == 0) { - full_ratio = -1; - } - if (struct_v < 4 && nearfull_ratio == 0) { - nearfull_ratio = -1; - } - if (struct_v >= 6) - ::decode(stamp, bl); - if (struct_v >= 7) { - ::decode(osd_epochs, bl); - } else { - for (map::iterator i = osd_stat_updates.begin(); - i != osd_stat_updates.end(); - ++i) { - // This isn't accurate, but will cause trimming to behave like - // previously. - osd_epochs.insert(make_pair(i->first, osdmap_epoch)); - } - } - DECODE_FINISH(bl); + DECODE_START(1, p); + ::decode(num_pg, p); + ::decode(num_pg_active, p); + ::decode(num_osd, p); + ::decode(osd_stat, p); + ::decode(pg_pool_sum, p); + ::decode(osd_sum, p); + ::decode(pg_sum, p); + ::decode(num_pg_by_state, p); + ::decode(num_pg_by_osd, p); + DECODE_FINISH(p); } -void PGMap::Incremental::dump(Formatter *f) const +void PGMapDigest::dump(Formatter *f) const { - f->dump_unsigned("version", version); - f->dump_stream("stamp") << stamp; - f->dump_unsigned("osdmap_epoch", osdmap_epoch); - f->dump_unsigned("pg_scan_epoch", pg_scan); - f->dump_float("full_ratio", full_ratio); - f->dump_float("nearfull_ratio", nearfull_ratio); - - f->open_array_section("pg_stat_updates"); - for (map::const_iterator p = pg_stat_updates.begin(); p != pg_stat_updates.end(); ++p) { - f->open_object_section("pg_stat"); - f->dump_stream("pgid") << p->first; - p->second.dump(f); + f->dump_unsigned("num_pg", num_pg); + f->dump_unsigned("num_pg_active", num_pg_active); + f->dump_unsigned("num_osd", num_osd); + f->dump_object("pool_sum", pg_sum); + f->dump_object("osd_sum", osd_sum); + f->open_array_section("pool_stats"); + for (auto& p : pg_pool_sum) { + f->open_object_section("pool_stat"); + f->dump_int("poolid", p.first); + p.second.dump(f); f->close_section(); } f->close_section(); - - f->open_array_section("osd_stat_updates"); - for (map::const_iterator p = osd_stat_updates.begin(); p != osd_stat_updates.end(); ++p) { + f->open_array_section("osd_stats"); + for (auto& p : osd_stat) { f->open_object_section("osd_stat"); - f->dump_int("osd", p->first); - p->second.dump(f); + f->dump_int("osd", p.first); + p.second.dump(f); f->close_section(); } f->close_section(); - - f->open_array_section("osd_stat_removals"); - for (set::const_iterator p = osd_stat_rm.begin(); p != osd_stat_rm.end(); ++p) - f->dump_int("osd", *p); + f->open_array_section("num_pg_by_state"); + for (auto& p : num_pg_by_state) { + f->open_object_section("count"); + f->dump_string("state", pg_state_string(p.first)); + f->dump_unsigned("num", p.second); + f->close_section(); + } f->close_section(); - - f->open_array_section("pg_removals"); - for (set::const_iterator p = pg_remove.begin(); p != pg_remove.end(); ++p) - f->dump_stream("pgid") << *p; + f->open_array_section("num_pg_by_osd"); + for (auto& p : num_pg_by_osd) { + f->open_object_section("count"); + f->dump_unsigned("osd", p.first); + f->dump_unsigned("num_primary_pg", p.second.primary); + f->dump_unsigned("num_acting_pg", p.second.acting); + f->dump_unsigned("num_up_pg", p.second.up); + f->close_section(); + } f->close_section(); } -void PGMap::Incremental::generate_test_instances(list& o) +void PGMapDigest::generate_test_instances(list& ls) { - o.push_back(new Incremental); - o.push_back(new Incremental); - o.back()->version = 1; - o.back()->stamp = utime_t(123,345); - o.push_back(new Incremental); - o.back()->version = 2; - o.back()->pg_stat_updates[pg_t(1,2,3)] = pg_stat_t(); - o.back()->osd_stat_updates[5] = osd_stat_t(); - o.back()->osd_epochs[5] = 12; - o.push_back(new Incremental); - o.back()->version = 3; - o.back()->osdmap_epoch = 1; - o.back()->pg_scan = 2; - o.back()->full_ratio = .2; - o.back()->nearfull_ratio = .3; - o.back()->pg_stat_updates[pg_t(4,5,6)] = pg_stat_t(); - o.back()->osd_stat_updates[6] = osd_stat_t(); - o.back()->osd_epochs[6] = 12; - o.back()->pg_remove.insert(pg_t(1,2,3)); - o.back()->osd_stat_rm.insert(5); + ls.push_back(new PGMapDigest); } +inline std::string percentify(const float& a) { + std::stringstream ss; + if (a < 0.01) + ss << "0"; + else + ss << std::fixed << std::setprecision(2) << a; + return ss.str(); +} -// -- - -void PGMap::apply_incremental(CephContext *cct, const Incremental& inc) +void PGMapDigest::print_summary(Formatter *f, ostream *out) const { - assert(inc.version == version+1); - version++; - - utime_t delta_t; - delta_t = inc.stamp; - delta_t -= stamp; - stamp = inc.stamp; - - pool_stat_t pg_sum_old = pg_sum; - ceph::unordered_map pg_pool_sum_old; + if (f) + f->open_array_section("pgs_by_state"); - bool ratios_changed = false; - if (inc.full_ratio != full_ratio && inc.full_ratio != -1) { - full_ratio = inc.full_ratio; - ratios_changed = true; + // list is descending numeric order (by count) + multimap state_by_count; // count -> state + for (ceph::unordered_map::const_iterator p = num_pg_by_state.begin(); + p != num_pg_by_state.end(); + ++p) { + state_by_count.insert(make_pair(p->second, p->first)); } - if (inc.nearfull_ratio != nearfull_ratio && inc.nearfull_ratio != -1) { - nearfull_ratio = inc.nearfull_ratio; - ratios_changed = true; + if (f) { + for (multimap::reverse_iterator p = state_by_count.rbegin(); + p != state_by_count.rend(); + ++p) + { + f->open_object_section("pgs_by_state_element"); + f->dump_string("state_name", pg_state_string(p->second)); + f->dump_unsigned("count", p->first); + f->close_section(); + } } - if (ratios_changed) - redo_full_sets(); - - for (map::const_iterator p = inc.pg_stat_updates.begin(); - p != inc.pg_stat_updates.end(); - ++p) { - const pg_t &update_pg(p->first); - const pg_stat_t &update_stat(p->second); + if (f) + f->close_section(); - if (pg_pool_sum_old.count(update_pg.pool()) == 0) - pg_pool_sum_old[update_pg.pool()] = pg_pool_sum[update_pg.pool()]; + if (f) { + f->dump_unsigned("num_pgs", num_pg); + f->dump_unsigned("num_pools", pg_pool_sum.size()); + f->dump_unsigned("num_objects", pg_sum.stats.sum.num_objects); + f->dump_unsigned("data_bytes", pg_sum.stats.sum.num_bytes); + f->dump_unsigned("bytes_used", osd_sum.kb_used * 1024ull); + f->dump_unsigned("bytes_avail", osd_sum.kb_avail * 1024ull); + f->dump_unsigned("bytes_total", osd_sum.kb * 1024ull); + } else { + *out << " pools: " << pg_pool_sum.size() << " pools, " + << num_pg << " pgs\n"; + *out << " objects: " << si_t(pg_sum.stats.sum.num_objects) << " objects, " + << prettybyte_t(pg_sum.stats.sum.num_bytes) << "\n"; + *out << " usage: " + << kb_t(osd_sum.kb_used) << " used, " + << kb_t(osd_sum.kb_avail) << " / " + << kb_t(osd_sum.kb) << " avail\n"; + *out << " pgs: "; + } - ceph::unordered_map::iterator t = pg_stat.find(update_pg); - if (t == pg_stat.end()) { - ceph::unordered_map::value_type v(update_pg, update_stat); - pg_stat.insert(v); + bool pad = false; + if (num_pg_active < num_pg) { + float p = (float)num_pg_active / (float)num_pg; + if (f) { + f->dump_float("active_pgs_ratio", p); } else { - stat_pg_sub(update_pg, t->second); - t->second = update_stat; + char b[20]; + snprintf(b, sizeof(b), "%.3f", (1.0 - p) * 100.0); + *out << b << "% pgs inactive\n"; + pad = true; } - stat_pg_add(update_pg, update_stat); } - assert(osd_stat.size() == osd_epochs.size()); - for (map::const_iterator p = - inc.get_osd_stat_updates().begin(); - p != inc.get_osd_stat_updates().end(); - ++p) { - int osd = p->first; - const osd_stat_t &new_stats(p->second); - ceph::unordered_map::iterator t = osd_stat.find(osd); - if (t == osd_stat.end()) { - ceph::unordered_map::value_type v(osd, new_stats); - osd_stat.insert(v); - } else { - stat_osd_sub(t->second); - t->second = new_stats; + list sl; + overall_recovery_summary(f, &sl); + if (!f && !sl.empty()) { + for (list::iterator p = sl.begin(); p != sl.end(); ++p) { + if (pad) { + *out << " "; + } + *out << *p << "\n"; + pad = true; } - ceph::unordered_map::iterator i = osd_epochs.find(osd); - map::const_iterator j = inc.get_osd_epochs().find(osd); - assert(j != inc.get_osd_epochs().end()); - - if (i == osd_epochs.end()) - osd_epochs.insert(*j); - else - i->second = j->second; - - stat_osd_add(new_stats); - - // adjust [near]full status - register_nearfull_status(osd, new_stats); - } - set deleted_pools; - for (set::const_iterator p = inc.pg_remove.begin(); - p != inc.pg_remove.end(); - ++p) { - const pg_t &removed_pg(*p); - ceph::unordered_map::iterator s = pg_stat.find(removed_pg); - if (s != pg_stat.end()) { - stat_pg_sub(removed_pg, s->second); - pg_stat.erase(s); - } - if (removed_pg.ps() == 0) - deleted_pools.insert(removed_pg.pool()); - } - for (set::iterator p = deleted_pools.begin(); - p != deleted_pools.end(); - ++p) { - dout(20) << " deleted pool " << *p << dendl; - deleted_pool(*p); } + sl.clear(); - for (set::iterator p = inc.get_osd_stat_rm().begin(); - p != inc.get_osd_stat_rm().end(); - ++p) { - ceph::unordered_map::iterator t = osd_stat.find(*p); - if (t != osd_stat.end()) { - stat_osd_sub(t->second); - osd_stat.erase(t); + if (!f) { + unsigned max_width = 1; + for (multimap::reverse_iterator p = state_by_count.rbegin(); + p != state_by_count.rend(); + ++p) + { + std::stringstream ss; + ss << p->first; + max_width = MAX(ss.str().size(), max_width); } - // remove these old osds from full/nearfull set(s), too - nearfull_osds.erase(*p); - full_osds.erase(*p); - } - - // calculate a delta, and average over the last 2 deltas. - pool_stat_t d = pg_sum; - d.stats.sub(pg_sum_old.stats); - pg_sum_deltas.push_back(make_pair(d, delta_t)); - stamp_delta += delta_t; - - pg_sum_delta.stats.add(d.stats); - if (pg_sum_deltas.size() > (std::list< pair >::size_type)MAX(1, cct ? cct->_conf->mon_stat_smooth_intervals : 1)) { - pg_sum_delta.stats.sub(pg_sum_deltas.front().first.stats); - stamp_delta -= pg_sum_deltas.front().second; - pg_sum_deltas.pop_front(); + for (multimap::reverse_iterator p = state_by_count.rbegin(); + p != state_by_count.rend(); + ++p) + { + if (pad) { + *out << " "; + } + pad = true; + out->setf(std::ios::left); + *out << std::setw(max_width) << p->first + << " " << pg_state_string(p->second) << "\n"; + out->unsetf(std::ios::left); + } } - update_pool_deltas(cct, inc.stamp, pg_pool_sum_old); - - if (inc.osdmap_epoch) - last_osdmap_epoch = inc.osdmap_epoch; - if (inc.pg_scan) - last_pg_scan = inc.pg_scan; - - min_last_epoch_clean = 0; // invalidate -} + ostringstream ss_rec_io; + overall_recovery_rate_summary(f, &ss_rec_io); + ostringstream ss_client_io; + overall_client_io_rate_summary(f, &ss_client_io); + ostringstream ss_cache_io; + overall_cache_io_rate_summary(f, &ss_cache_io); -void PGMap::redo_full_sets() -{ - full_osds.clear(); - nearfull_osds.clear(); - for (ceph::unordered_map::iterator i = osd_stat.begin(); - i != osd_stat.end(); - ++i) { - register_nearfull_status(i->first, i->second); + if (!f && (ss_client_io.str().length() || ss_rec_io.str().length() + || ss_cache_io.str().length())) { + *out << "\n \n"; + *out << " io:\n"; } -} - -void PGMap::register_nearfull_status(int osd, const osd_stat_t& s) -{ - float ratio = ((float)s.kb_used) / ((float)s.kb); - if (full_ratio > 0 && ratio > full_ratio) { - // full - full_osds.insert(osd); - nearfull_osds.erase(osd); - } else if (nearfull_ratio > 0 && ratio > nearfull_ratio) { - // nearfull - full_osds.erase(osd); - nearfull_osds.insert(osd); - } else { - // ok - full_osds.erase(osd); - nearfull_osds.erase(osd); - } + if (!f && ss_client_io.str().length()) + *out << " client: " << ss_client_io.str() << "\n"; + if (!f && ss_rec_io.str().length()) + *out << " recovery: " << ss_rec_io.str() << "\n"; + if (!f && ss_cache_io.str().length()) + *out << " cache: " << ss_cache_io.str() << "\n"; } -void PGMap::calc_stats() +void PGMapDigest::print_oneline_summary(Formatter *f, ostream *out) const { - num_pg_by_state.clear(); - num_pg = 0; - num_pg_active = 0; - num_osd = 0; - pg_pool_sum.clear(); - pg_sum = pool_stat_t(); - osd_sum = osd_stat_t(); - pg_by_osd.clear(); - num_primary_pg_by_osd.clear(); + std::stringstream ss; - for (ceph::unordered_map::iterator p = pg_stat.begin(); - p != pg_stat.end(); + if (f) + f->open_array_section("num_pg_by_state"); + for (ceph::unordered_map::const_iterator p = num_pg_by_state.begin(); + p != num_pg_by_state.end(); ++p) { - stat_pg_add(p->first, p->second); + if (f) { + f->open_object_section("state"); + f->dump_string("name", pg_state_string(p->first)); + f->dump_unsigned("num", p->second); + f->close_section(); + } + if (p != num_pg_by_state.begin()) + ss << ", "; + ss << p->second << " " << pg_state_string(p->first); } - for (ceph::unordered_map::iterator p = osd_stat.begin(); - p != osd_stat.end(); - ++p) - stat_osd_add(p->second); - - redo_full_sets(); - - min_last_epoch_clean = calc_min_last_epoch_clean(); -} + if (f) + f->close_section(); -void PGMap::update_pg(pg_t pgid, bufferlist& bl) -{ - bufferlist::iterator p = bl.begin(); - ceph::unordered_map::iterator s = pg_stat.find(pgid); - epoch_t old_lec = 0, lec; - if (s != pg_stat.end()) { - old_lec = s->second.get_effective_last_epoch_clean(); - stat_pg_update(pgid, s->second, p); - lec = s->second.get_effective_last_epoch_clean(); - } else { - pg_stat_t& r = pg_stat[pgid]; - ::decode(r, p); - stat_pg_add(pgid, r); - lec = r.get_effective_last_epoch_clean(); + string states = ss.str(); + if (out) + *out << num_pg << " pgs: " + << states << "; " + << prettybyte_t(pg_sum.stats.sum.num_bytes) << " data, " + << kb_t(osd_sum.kb_used) << " used, " + << kb_t(osd_sum.kb_avail) << " / " + << kb_t(osd_sum.kb) << " avail"; + if (f) { + f->dump_unsigned("num_pgs", num_pg); + f->dump_unsigned("num_bytes", pg_sum.stats.sum.num_bytes); + f->dump_unsigned("raw_bytes_used", osd_sum.kb_used << 10); + f->dump_unsigned("raw_bytes_avail", osd_sum.kb_avail << 10); + f->dump_unsigned("raw_bytes", osd_sum.kb << 10); } - if (min_last_epoch_clean && - (lec < min_last_epoch_clean || // we did - (lec > min_last_epoch_clean && // we might - old_lec == min_last_epoch_clean) - )) - min_last_epoch_clean = 0; -} - -void PGMap::remove_pg(pg_t pgid) -{ - ceph::unordered_map::iterator s = pg_stat.find(pgid); - if (s != pg_stat.end()) { - if (min_last_epoch_clean && - s->second.get_effective_last_epoch_clean() == min_last_epoch_clean) - min_last_epoch_clean = 0; - stat_pg_sub(pgid, s->second); - pg_stat.erase(s); + // make non-negative; we can get negative values if osds send + // uncommitted stats and then "go backward" or if they are just + // buggy/wrong. + pool_stat_t pos_delta = pg_sum_delta; + pos_delta.floor(0); + if (pos_delta.stats.sum.num_rd || + pos_delta.stats.sum.num_wr) { + if (out) + *out << "; "; + if (pos_delta.stats.sum.num_rd) { + int64_t rd = (pos_delta.stats.sum.num_rd_kb << 10) / (double)stamp_delta; + if (out) + *out << pretty_si_t(rd) << "B/s rd, "; + if (f) + f->dump_unsigned("read_bytes_sec", rd); + } + if (pos_delta.stats.sum.num_wr) { + int64_t wr = (pos_delta.stats.sum.num_wr_kb << 10) / (double)stamp_delta; + if (out) + *out << pretty_si_t(wr) << "B/s wr, "; + if (f) + f->dump_unsigned("write_bytes_sec", wr); + } + int64_t iops = (pos_delta.stats.sum.num_rd + pos_delta.stats.sum.num_wr) / (double)stamp_delta; + if (out) + *out << pretty_si_t(iops) << "op/s"; + if (f) + f->dump_unsigned("io_sec", iops); } + + list sl; + overall_recovery_summary(f, &sl); + if (out) + for (list::iterator p = sl.begin(); p != sl.end(); ++p) + *out << "; " << *p; + std::stringstream ssr; + overall_recovery_rate_summary(f, &ssr); + if (out && ssr.str().length()) + *out << "; " << ssr.str() << " recovering"; } -void PGMap::update_osd(int osd, bufferlist& bl) +void PGMapDigest::recovery_summary(Formatter *f, list *psl, + const pool_stat_t& delta_sum) const { - bufferlist::iterator p = bl.begin(); - ceph::unordered_map::iterator o = osd_stat.find(osd); - epoch_t old_lec = 0; - if (o != osd_stat.end()) { - ceph::unordered_map::iterator i = osd_epochs.find(osd); - if (i != osd_epochs.end()) - old_lec = i->second; - stat_osd_sub(o->second); + if (delta_sum.stats.sum.num_objects_degraded && delta_sum.stats.sum.num_object_copies > 0) { + double pc = (double)delta_sum.stats.sum.num_objects_degraded / + (double)delta_sum.stats.sum.num_object_copies * (double)100.0; + char b[20]; + snprintf(b, sizeof(b), "%.3lf", pc); + if (f) { + f->dump_unsigned("degraded_objects", delta_sum.stats.sum.num_objects_degraded); + f->dump_unsigned("degraded_total", delta_sum.stats.sum.num_object_copies); + f->dump_float("degraded_ratio", pc / 100.0); + } else { + ostringstream ss; + ss << delta_sum.stats.sum.num_objects_degraded + << "/" << delta_sum.stats.sum.num_object_copies << " objects degraded (" << b << "%)"; + psl->push_back(ss.str()); + } } - osd_stat_t& r = osd_stat[osd]; - ::decode(r, p); - stat_osd_add(r); - - // adjust [near]full status - register_nearfull_status(osd, r); - - // epoch? - if (!p.end()) { - epoch_t e; - ::decode(e, p); - - if (e < min_last_epoch_clean || - (e > min_last_epoch_clean && - old_lec == min_last_epoch_clean)) - min_last_epoch_clean = 0; - } else { - // WARNING: we are not refreshing min_last_epoch_clean! must be old store - // or old mon running. + if (delta_sum.stats.sum.num_objects_misplaced && delta_sum.stats.sum.num_object_copies > 0) { + double pc = (double)delta_sum.stats.sum.num_objects_misplaced / + (double)delta_sum.stats.sum.num_object_copies * (double)100.0; + char b[20]; + snprintf(b, sizeof(b), "%.3lf", pc); + if (f) { + f->dump_unsigned("misplaced_objects", delta_sum.stats.sum.num_objects_misplaced); + f->dump_unsigned("misplaced_total", delta_sum.stats.sum.num_object_copies); + f->dump_float("misplaced_ratio", pc / 100.0); + } else { + ostringstream ss; + ss << delta_sum.stats.sum.num_objects_misplaced + << "/" << delta_sum.stats.sum.num_object_copies << " objects misplaced (" << b << "%)"; + psl->push_back(ss.str()); + } + } + if (delta_sum.stats.sum.num_objects_unfound && delta_sum.stats.sum.num_objects) { + double pc = (double)delta_sum.stats.sum.num_objects_unfound / + (double)delta_sum.stats.sum.num_objects * (double)100.0; + char b[20]; + snprintf(b, sizeof(b), "%.3lf", pc); + if (f) { + f->dump_unsigned("unfound_objects", delta_sum.stats.sum.num_objects_unfound); + f->dump_unsigned("unfound_total", delta_sum.stats.sum.num_objects); + f->dump_float("unfound_ratio", pc / 100.0); + } else { + ostringstream ss; + ss << delta_sum.stats.sum.num_objects_unfound + << "/" << delta_sum.stats.sum.num_objects << " unfound (" << b << "%)"; + psl->push_back(ss.str()); + } } } -void PGMap::remove_osd(int osd) +void PGMapDigest::recovery_rate_summary(Formatter *f, ostream *out, + const pool_stat_t& delta_sum, + utime_t delta_stamp) const { - ceph::unordered_map::iterator o = osd_stat.find(osd); - if (o != osd_stat.end()) { - stat_osd_sub(o->second); - osd_stat.erase(o); - - // remove these old osds from full/nearfull set(s), too - nearfull_osds.erase(osd); - full_osds.erase(osd); + // make non-negative; we can get negative values if osds send + // uncommitted stats and then "go backward" or if they are just + // buggy/wrong. + pool_stat_t pos_delta = delta_sum; + pos_delta.floor(0); + if (pos_delta.stats.sum.num_objects_recovered || + pos_delta.stats.sum.num_bytes_recovered || + pos_delta.stats.sum.num_keys_recovered) { + int64_t objps = pos_delta.stats.sum.num_objects_recovered / (double)delta_stamp; + int64_t bps = pos_delta.stats.sum.num_bytes_recovered / (double)delta_stamp; + int64_t kps = pos_delta.stats.sum.num_keys_recovered / (double)delta_stamp; + if (f) { + f->dump_int("recovering_objects_per_sec", objps); + f->dump_int("recovering_bytes_per_sec", bps); + f->dump_int("recovering_keys_per_sec", kps); + f->dump_int("num_objects_recovered", pos_delta.stats.sum.num_objects_recovered); + f->dump_int("num_bytes_recovered", pos_delta.stats.sum.num_bytes_recovered); + f->dump_int("num_keys_recovered", pos_delta.stats.sum.num_keys_recovered); + } else { + *out << pretty_si_t(bps) << "B/s"; + if (pos_delta.stats.sum.num_keys_recovered) + *out << ", " << pretty_si_t(kps) << "keys/s"; + *out << ", " << pretty_si_t(objps) << "objects/s"; + } } } -void PGMap::stat_pg_add(const pg_t &pgid, const pg_stat_t &s, - bool sameosds) +void PGMapDigest::overall_recovery_rate_summary(Formatter *f, ostream *out) const { - pg_pool_sum[pgid.pool()].add(s); - pg_sum.add(s); - - num_pg++; - num_pg_by_state[s.state]++; - - if ((s.state & PG_STATE_CREATING) && - s.parent_split_bits == 0) { - creating_pgs.insert(pgid); - if (s.acting_primary >= 0) { - creating_pgs_by_osd_epoch[s.acting_primary][s.mapping_epoch].insert(pgid); - } - } + recovery_rate_summary(f, out, pg_sum_delta, stamp_delta); +} - if (s.state & PG_STATE_ACTIVE) { - ++num_pg_active; - } +void PGMapDigest::overall_recovery_summary(Formatter *f, list *psl) const +{ + recovery_summary(f, psl, pg_sum); +} - if (sameosds) +void PGMapDigest::pool_recovery_rate_summary(Formatter *f, ostream *out, + uint64_t poolid) const +{ + ceph::unordered_map >::const_iterator p = + per_pool_sum_delta.find(poolid); + if (p == per_pool_sum_delta.end()) return; - for (vector::const_iterator p = s.blocked_by.begin(); - p != s.blocked_by.end(); - ++p) { - ++blocked_by_sum[*p]; - } - - for (vector::const_iterator p = s.acting.begin(); p != s.acting.end(); ++p) - pg_by_osd[*p].insert(pgid); - for (vector::const_iterator p = s.up.begin(); p != s.up.end(); ++p) - pg_by_osd[*p].insert(pgid); - - if (s.up_primary >= 0) - num_primary_pg_by_osd[s.up_primary]++; + ceph::unordered_map::const_iterator ts = + per_pool_sum_deltas_stamps.find(p->first); + assert(ts != per_pool_sum_deltas_stamps.end()); + recovery_rate_summary(f, out, p->second.first, ts->second); } -void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s, - bool sameosds) +void PGMapDigest::pool_recovery_summary(Formatter *f, list *psl, + uint64_t poolid) const { - pool_stat_t& ps = pg_pool_sum[pgid.pool()]; - ps.sub(s); - if (ps.is_zero()) - pg_pool_sum.erase(pgid.pool()); - pg_sum.sub(s); + ceph::unordered_map >::const_iterator p = + per_pool_sum_delta.find(poolid); + if (p == per_pool_sum_delta.end()) + return; - num_pg--; - int end = --num_pg_by_state[s.state]; - assert(end >= 0); - if (end == 0) - num_pg_by_state.erase(s.state); + recovery_summary(f, psl, p->second.first); +} - if ((s.state & PG_STATE_CREATING) && - s.parent_split_bits == 0) { - creating_pgs.erase(pgid); - if (s.acting_primary >= 0) { - map >& r = creating_pgs_by_osd_epoch[s.acting_primary]; - r[s.mapping_epoch].erase(pgid); - if (r[s.mapping_epoch].empty()) - r.erase(s.mapping_epoch); - if (r.empty()) - creating_pgs_by_osd_epoch.erase(s.acting_primary); +void PGMapDigest::client_io_rate_summary(Formatter *f, ostream *out, + const pool_stat_t& delta_sum, + utime_t delta_stamp) const +{ + pool_stat_t pos_delta = delta_sum; + pos_delta.floor(0); + if (pos_delta.stats.sum.num_rd || + pos_delta.stats.sum.num_wr) { + if (pos_delta.stats.sum.num_rd) { + int64_t rd = (pos_delta.stats.sum.num_rd_kb << 10) / (double)delta_stamp; + if (f) { + f->dump_int("read_bytes_sec", rd); + } else { + *out << pretty_si_t(rd) << "B/s rd, "; + } + } + if (pos_delta.stats.sum.num_wr) { + int64_t wr = (pos_delta.stats.sum.num_wr_kb << 10) / (double)delta_stamp; + if (f) { + f->dump_int("write_bytes_sec", wr); + } else { + *out << pretty_si_t(wr) << "B/s wr, "; + } + } + int64_t iops_rd = pos_delta.stats.sum.num_rd / (double)delta_stamp; + int64_t iops_wr = pos_delta.stats.sum.num_wr / (double)delta_stamp; + if (f) { + f->dump_int("read_op_per_sec", iops_rd); + f->dump_int("write_op_per_sec", iops_wr); + } else { + *out << pretty_si_t(iops_rd) << "op/s rd, " << pretty_si_t(iops_wr) << "op/s wr"; } } +} - if (s.state & PG_STATE_ACTIVE) { - --num_pg_active; - } +void PGMapDigest::overall_client_io_rate_summary(Formatter *f, ostream *out) const +{ + client_io_rate_summary(f, out, pg_sum_delta, stamp_delta); +} - if (sameosds) +void PGMapDigest::pool_client_io_rate_summary(Formatter *f, ostream *out, + uint64_t poolid) const +{ + ceph::unordered_map >::const_iterator p = + per_pool_sum_delta.find(poolid); + if (p == per_pool_sum_delta.end()) return; - for (vector::const_iterator p = s.blocked_by.begin(); - p != s.blocked_by.end(); - ++p) { - ceph::unordered_map::iterator q = blocked_by_sum.find(*p); - assert(q != blocked_by_sum.end()); - --q->second; - if (q->second == 0) - blocked_by_sum.erase(q); - } - - for (vector::const_iterator p = s.acting.begin(); p != s.acting.end(); ++p) { - set& oset = pg_by_osd[*p]; - oset.erase(pgid); - if (oset.empty()) - pg_by_osd.erase(*p); - } - for (vector::const_iterator p = s.up.begin(); p != s.up.end(); ++p) { - set& oset = pg_by_osd[*p]; - oset.erase(pgid); - if (oset.empty()) - pg_by_osd.erase(*p); - } - - if (s.up_primary >= 0) { - auto it = num_primary_pg_by_osd.find(s.up_primary); - if (it != num_primary_pg_by_osd.end() && it->second > 0) - it->second--; - } + ceph::unordered_map::const_iterator ts = + per_pool_sum_deltas_stamps.find(p->first); + assert(ts != per_pool_sum_deltas_stamps.end()); + client_io_rate_summary(f, out, p->second.first, ts->second); } -void PGMap::stat_pg_update(const pg_t pgid, pg_stat_t& s, - bufferlist::iterator& blp) +void PGMapDigest::cache_io_rate_summary(Formatter *f, ostream *out, + const pool_stat_t& delta_sum, + utime_t delta_stamp) const { - pg_stat_t n; - ::decode(n, blp); + pool_stat_t pos_delta = delta_sum; + pos_delta.floor(0); + bool have_output = false; - bool sameosds = + if (pos_delta.stats.sum.num_flush) { + int64_t flush = (pos_delta.stats.sum.num_flush_kb << 10) / (double)delta_stamp; + if (f) { + f->dump_int("flush_bytes_sec", flush); + } else { + *out << pretty_si_t(flush) << "B/s flush"; + have_output = true; + } + } + if (pos_delta.stats.sum.num_evict) { + int64_t evict = (pos_delta.stats.sum.num_evict_kb << 10) / (double)delta_stamp; + if (f) { + f->dump_int("evict_bytes_sec", evict); + } else { + if (have_output) + *out << ", "; + *out << pretty_si_t(evict) << "B/s evict"; + have_output = true; + } + } + if (pos_delta.stats.sum.num_promote) { + int64_t promote = pos_delta.stats.sum.num_promote / (double)delta_stamp; + if (f) { + f->dump_int("promote_op_per_sec", promote); + } else { + if (have_output) + *out << ", "; + *out << pretty_si_t(promote) << "op/s promote"; + have_output = true; + } + } + if (pos_delta.stats.sum.num_flush_mode_low) { + if (f) { + f->dump_int("num_flush_mode_low", pos_delta.stats.sum.num_flush_mode_low); + } else { + if (have_output) + *out << ", "; + *out << pretty_si_t(pos_delta.stats.sum.num_flush_mode_low) << "PG(s) flushing"; + have_output = true; + } + } + if (pos_delta.stats.sum.num_flush_mode_high) { + if (f) { + f->dump_int("num_flush_mode_high", pos_delta.stats.sum.num_flush_mode_high); + } else { + if (have_output) + *out << ", "; + *out << pretty_si_t(pos_delta.stats.sum.num_flush_mode_high) << "PG(s) flushing (high)"; + have_output = true; + } + } + if (pos_delta.stats.sum.num_evict_mode_some) { + if (f) { + f->dump_int("num_evict_mode_some", pos_delta.stats.sum.num_evict_mode_some); + } else { + if (have_output) + *out << ", "; + *out << pretty_si_t(pos_delta.stats.sum.num_evict_mode_some) << "PG(s) evicting"; + have_output = true; + } + } + if (pos_delta.stats.sum.num_evict_mode_full) { + if (f) { + f->dump_int("num_evict_mode_full", pos_delta.stats.sum.num_evict_mode_full); + } else { + if (have_output) + *out << ", "; + *out << pretty_si_t(pos_delta.stats.sum.num_evict_mode_full) << "PG(s) evicting (full)"; + } + } +} + +void PGMapDigest::overall_cache_io_rate_summary(Formatter *f, ostream *out) const +{ + cache_io_rate_summary(f, out, pg_sum_delta, stamp_delta); +} + +void PGMapDigest::pool_cache_io_rate_summary(Formatter *f, ostream *out, + uint64_t poolid) const +{ + ceph::unordered_map >::const_iterator p = + per_pool_sum_delta.find(poolid); + if (p == per_pool_sum_delta.end()) + return; + + ceph::unordered_map::const_iterator ts = + per_pool_sum_deltas_stamps.find(p->first); + assert(ts != per_pool_sum_deltas_stamps.end()); + cache_io_rate_summary(f, out, p->second.first, ts->second); +} + +void PGMapDigest::dump_pool_stats_full( + const OSDMap &osd_map, + stringstream *ss, + Formatter *f, + bool verbose) const +{ + TextTable tbl; + + if (f) { + f->open_array_section("pools"); + } else { + tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("ID", TextTable::LEFT, TextTable::LEFT); + if (verbose) { + tbl.define_column("QUOTA OBJECTS", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("QUOTA BYTES", TextTable::LEFT, TextTable::LEFT); + } + + tbl.define_column("USED", TextTable::LEFT, TextTable::RIGHT); + tbl.define_column("%USED", TextTable::LEFT, TextTable::RIGHT); + tbl.define_column("MAX AVAIL", TextTable::LEFT, TextTable::RIGHT); + tbl.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT); + if (verbose) { + tbl.define_column("DIRTY", TextTable::LEFT, TextTable::RIGHT); + tbl.define_column("READ", TextTable::LEFT, TextTable::RIGHT); + tbl.define_column("WRITE", TextTable::LEFT, TextTable::RIGHT); + tbl.define_column("RAW USED", TextTable::LEFT, TextTable::RIGHT); + } + } + + map avail_by_rule; + for (map::const_iterator p = osd_map.get_pools().begin(); + p != osd_map.get_pools().end(); ++p) { + int64_t pool_id = p->first; + if ((pool_id < 0) || (pg_pool_sum.count(pool_id) == 0)) + continue; + const string& pool_name = osd_map.get_pool_name(pool_id); + const pool_stat_t &stat = pg_pool_sum.at(pool_id); + + const pg_pool_t *pool = osd_map.get_pg_pool(pool_id); + int ruleno = osd_map.crush->find_rule(pool->get_crush_ruleset(), + pool->get_type(), + pool->get_size()); + int64_t avail; + float raw_used_rate; + if (avail_by_rule.count(ruleno) == 0) { + avail = get_rule_avail(osd_map, ruleno); + if (avail < 0) + avail = 0; + avail_by_rule[ruleno] = avail; + } else { + avail = avail_by_rule[ruleno]; + } + switch (pool->get_type()) { + case pg_pool_t::TYPE_REPLICATED: + avail /= pool->get_size(); + raw_used_rate = pool->get_size(); + break; + case pg_pool_t::TYPE_ERASURE: + { + auto& ecp = + osd_map.get_erasure_code_profile(pool->erasure_code_profile); + auto pm = ecp.find("m"); + auto pk = ecp.find("k"); + if (pm != ecp.end() && pk != ecp.end()) { + int k = atoi(pk->second.c_str()); + int m = atoi(pm->second.c_str()); + avail = avail * k / (m + k); + raw_used_rate = (float)(m + k) / k; + } else { + raw_used_rate = 0.0; + } + } + break; + default: + assert(0 == "unrecognized pool type"); + } + + if (f) { + f->open_object_section("pool"); + f->dump_string("name", pool_name); + f->dump_int("id", pool_id); + f->open_object_section("stats"); + } else { + tbl << pool_name + << pool_id; + if (verbose) { + if (pool->quota_max_objects == 0) + tbl << "N/A"; + else + tbl << si_t(pool->quota_max_objects); + + if (pool->quota_max_bytes == 0) + tbl << "N/A"; + else + tbl << si_t(pool->quota_max_bytes); + } + + } + dump_object_stat_sum(tbl, f, stat.stats.sum, avail, raw_used_rate, verbose, pool); + if (f) + f->close_section(); // stats + else + tbl << TextTable::endrow; + + if (f) + f->close_section(); // pool + } + if (f) + f->close_section(); + else { + assert(ss != nullptr); + *ss << "POOLS:\n"; + tbl.set_indent(4); + *ss << tbl; + } +} + +void PGMapDigest::dump_fs_stats(stringstream *ss, Formatter *f, bool verbose) const +{ + if (f) { + f->open_object_section("stats"); + f->dump_int("total_bytes", osd_sum.kb * 1024ull); + f->dump_int("total_used_bytes", osd_sum.kb_used * 1024ull); + f->dump_int("total_avail_bytes", osd_sum.kb_avail * 1024ull); + if (verbose) { + f->dump_int("total_objects", pg_sum.stats.sum.num_objects); + } + f->close_section(); + } else { + assert(ss != nullptr); + TextTable tbl; + tbl.define_column("SIZE", TextTable::LEFT, TextTable::RIGHT); + tbl.define_column("AVAIL", TextTable::LEFT, TextTable::RIGHT); + tbl.define_column("RAW USED", TextTable::LEFT, TextTable::RIGHT); + tbl.define_column("%RAW USED", TextTable::LEFT, TextTable::RIGHT); + if (verbose) { + tbl.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT); + } + tbl << stringify(si_t(osd_sum.kb*1024)) + << stringify(si_t(osd_sum.kb_avail*1024)) + << stringify(si_t(osd_sum.kb_used*1024)); + float used = 0.0; + if (osd_sum.kb > 0) { + used = ((float)osd_sum.kb_used / osd_sum.kb); + } + tbl << percentify(used*100); + if (verbose) { + tbl << stringify(si_t(pg_sum.stats.sum.num_objects)); + } + tbl << TextTable::endrow; + + *ss << "GLOBAL:\n"; + tbl.set_indent(4); + *ss << tbl; + } +} + +void PGMapDigest::dump_object_stat_sum( + TextTable &tbl, Formatter *f, + const object_stat_sum_t &sum, uint64_t avail, + float raw_used_rate, bool verbose, + const pg_pool_t *pool) +{ + float curr_object_copies_rate = 0.0; + if (sum.num_object_copies > 0) + curr_object_copies_rate = (float)(sum.num_object_copies - sum.num_objects_degraded) / sum.num_object_copies; + + if (f) { + f->dump_int("kb_used", SHIFT_ROUND_UP(sum.num_bytes, 10)); + f->dump_int("bytes_used", sum.num_bytes); + f->dump_unsigned("max_avail", avail); + f->dump_int("objects", sum.num_objects); + if (verbose) { + f->dump_int("quota_objects", pool->quota_max_objects); + f->dump_int("quota_bytes", pool->quota_max_bytes); + f->dump_int("dirty", sum.num_objects_dirty); + f->dump_int("rd", sum.num_rd); + f->dump_int("rd_bytes", sum.num_rd_kb * 1024ull); + f->dump_int("wr", sum.num_wr); + f->dump_int("wr_bytes", sum.num_wr_kb * 1024ull); + f->dump_int("raw_bytes_used", sum.num_bytes * raw_used_rate * curr_object_copies_rate); + } + } else { + tbl << stringify(si_t(sum.num_bytes)); + float used = 0.0; + if (avail) { + used = sum.num_bytes * curr_object_copies_rate; + used /= used + avail; + } else if (sum.num_bytes) { + used = 1.0; + } + tbl << percentify(used*100); + tbl << si_t(avail); + tbl << sum.num_objects; + if (verbose) { + tbl << stringify(si_t(sum.num_objects_dirty)) + << stringify(si_t(sum.num_rd)) + << stringify(si_t(sum.num_wr)) + << stringify(si_t(sum.num_bytes * raw_used_rate * curr_object_copies_rate)); + } + } +} + +int64_t PGMapDigest::get_rule_avail(const OSDMap& osdmap, int ruleno) const +{ + map wm; + int r = osdmap.crush->get_rule_weight_osd_map(ruleno, &wm); + if (r < 0) { + return r; + } + if (wm.empty()) { + return 0; + } + + float fratio; + if (osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS && + osdmap.get_full_ratio() > 0) { + fratio = osdmap.get_full_ratio(); + } else { + fratio = get_fallback_full_ratio(); + } + + int64_t min = -1; + for (map::iterator p = wm.begin(); p != wm.end(); ++p) { + ceph::unordered_map::const_iterator osd_info = + osd_stat.find(p->first); + if (osd_info != osd_stat.end()) { + if (osd_info->second.kb == 0 || p->second == 0) { + // osd must be out, hence its stats have been zeroed + // (unless we somehow managed to have a disk with size 0...) + // + // (p->second == 0), if osd weight is 0, no need to + // calculate proj below. + continue; + } + double unusable = (double)osd_info->second.kb * + (1.0 - fratio); + double avail = MAX(0.0, (double)osd_info->second.kb_avail - unusable); + avail *= 1024.0; + int64_t proj = (int64_t)(avail / (double)p->second); + if (min < 0 || proj < min) { + min = proj; + } + } else { + dout(0) << "Cannot get stat of OSD " << p->first << dendl; + } + } + return min; +} + + + +// --------------------- +// PGMap + +void PGMap::Incremental::encode(bufferlist &bl, uint64_t features) const +{ + if ((features & CEPH_FEATURE_MONENC) == 0) { + __u8 v = 4; + ::encode(v, bl); + ::encode(version, bl); + ::encode(pg_stat_updates, bl); + ::encode(osd_stat_updates, bl); + ::encode(osd_stat_rm, bl); + ::encode(osdmap_epoch, bl); + ::encode(pg_scan, bl); + ::encode(full_ratio, bl); + ::encode(nearfull_ratio, bl); + ::encode(pg_remove, bl); + return; + } + + ENCODE_START(7, 5, bl); + ::encode(version, bl); + ::encode(pg_stat_updates, bl); + ::encode(osd_stat_updates, bl); + ::encode(osd_stat_rm, bl); + ::encode(osdmap_epoch, bl); + ::encode(pg_scan, bl); + ::encode(full_ratio, bl); + ::encode(nearfull_ratio, bl); + ::encode(pg_remove, bl); + ::encode(stamp, bl); + ::encode(osd_epochs, bl); + ENCODE_FINISH(bl); +} + +void PGMap::Incremental::decode(bufferlist::iterator &bl) +{ + DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl); + ::decode(version, bl); + if (struct_v < 3) { + pg_stat_updates.clear(); + __u32 n; + ::decode(n, bl); + while (n--) { + old_pg_t opgid; + ::decode(opgid, bl); + pg_t pgid = opgid; + ::decode(pg_stat_updates[pgid], bl); + } + } else { + ::decode(pg_stat_updates, bl); + } + ::decode(osd_stat_updates, bl); + ::decode(osd_stat_rm, bl); + ::decode(osdmap_epoch, bl); + ::decode(pg_scan, bl); + if (struct_v >= 2) { + ::decode(full_ratio, bl); + ::decode(nearfull_ratio, bl); + } + if (struct_v < 3) { + pg_remove.clear(); + __u32 n; + ::decode(n, bl); + while (n--) { + old_pg_t opgid; + ::decode(opgid, bl); + pg_remove.insert(pg_t(opgid)); + } + } else { + ::decode(pg_remove, bl); + } + if (struct_v < 4 && full_ratio == 0) { + full_ratio = -1; + } + if (struct_v < 4 && nearfull_ratio == 0) { + nearfull_ratio = -1; + } + if (struct_v >= 6) + ::decode(stamp, bl); + if (struct_v >= 7) { + ::decode(osd_epochs, bl); + } else { + for (map::iterator i = osd_stat_updates.begin(); + i != osd_stat_updates.end(); + ++i) { + // This isn't accurate, but will cause trimming to behave like + // previously. + osd_epochs.insert(make_pair(i->first, osdmap_epoch)); + } + } + DECODE_FINISH(bl); +} + +void PGMap::Incremental::dump(Formatter *f) const +{ + f->dump_unsigned("version", version); + f->dump_stream("stamp") << stamp; + f->dump_unsigned("osdmap_epoch", osdmap_epoch); + f->dump_unsigned("pg_scan_epoch", pg_scan); + f->dump_float("full_ratio", full_ratio); + f->dump_float("nearfull_ratio", nearfull_ratio); + + f->open_array_section("pg_stat_updates"); + for (map::const_iterator p = pg_stat_updates.begin(); p != pg_stat_updates.end(); ++p) { + f->open_object_section("pg_stat"); + f->dump_stream("pgid") << p->first; + p->second.dump(f); + f->close_section(); + } + f->close_section(); + + f->open_array_section("osd_stat_updates"); + for (map::const_iterator p = osd_stat_updates.begin(); p != osd_stat_updates.end(); ++p) { + f->open_object_section("osd_stat"); + f->dump_int("osd", p->first); + p->second.dump(f); + f->close_section(); + } + f->close_section(); + + f->open_array_section("osd_stat_removals"); + for (set::const_iterator p = osd_stat_rm.begin(); p != osd_stat_rm.end(); ++p) + f->dump_int("osd", *p); + f->close_section(); + + f->open_array_section("pg_removals"); + for (set::const_iterator p = pg_remove.begin(); p != pg_remove.end(); ++p) + f->dump_stream("pgid") << *p; + f->close_section(); +} + +void PGMap::Incremental::generate_test_instances(list& o) +{ + o.push_back(new Incremental); + o.push_back(new Incremental); + o.back()->version = 1; + o.back()->stamp = utime_t(123,345); + o.push_back(new Incremental); + o.back()->version = 2; + o.back()->pg_stat_updates[pg_t(1,2,3)] = pg_stat_t(); + o.back()->osd_stat_updates[5] = osd_stat_t(); + o.back()->osd_epochs[5] = 12; + o.push_back(new Incremental); + o.back()->version = 3; + o.back()->osdmap_epoch = 1; + o.back()->pg_scan = 2; + o.back()->full_ratio = .2; + o.back()->nearfull_ratio = .3; + o.back()->pg_stat_updates[pg_t(4,5,6)] = pg_stat_t(); + o.back()->osd_stat_updates[6] = osd_stat_t(); + o.back()->osd_epochs[6] = 12; + o.back()->pg_remove.insert(pg_t(1,2,3)); + o.back()->osd_stat_rm.insert(5); +} + + +// -- + +void PGMap::apply_incremental(CephContext *cct, const Incremental& inc) +{ + assert(inc.version == version+1); + version++; + + utime_t delta_t; + delta_t = inc.stamp; + delta_t -= stamp; + stamp = inc.stamp; + + pool_stat_t pg_sum_old = pg_sum; + ceph::unordered_map pg_pool_sum_old; + + bool ratios_changed = false; + if (inc.full_ratio != full_ratio && inc.full_ratio != -1) { + full_ratio = inc.full_ratio; + ratios_changed = true; + } + if (inc.nearfull_ratio != nearfull_ratio && inc.nearfull_ratio != -1) { + nearfull_ratio = inc.nearfull_ratio; + ratios_changed = true; + } + if (ratios_changed) + redo_full_sets(); + + for (map::const_iterator p = inc.pg_stat_updates.begin(); + p != inc.pg_stat_updates.end(); + ++p) { + const pg_t &update_pg(p->first); + const pg_stat_t &update_stat(p->second); + + if (pg_pool_sum_old.count(update_pg.pool()) == 0) + pg_pool_sum_old[update_pg.pool()] = pg_pool_sum[update_pg.pool()]; + + ceph::unordered_map::iterator t = pg_stat.find(update_pg); + if (t == pg_stat.end()) { + ceph::unordered_map::value_type v(update_pg, update_stat); + pg_stat.insert(v); + } else { + stat_pg_sub(update_pg, t->second); + t->second = update_stat; + } + stat_pg_add(update_pg, update_stat); + } + assert(osd_stat.size() == osd_epochs.size()); + for (map::const_iterator p = + inc.get_osd_stat_updates().begin(); + p != inc.get_osd_stat_updates().end(); + ++p) { + int osd = p->first; + const osd_stat_t &new_stats(p->second); + + ceph::unordered_map::iterator t = osd_stat.find(osd); + if (t == osd_stat.end()) { + ceph::unordered_map::value_type v(osd, new_stats); + osd_stat.insert(v); + } else { + stat_osd_sub(t->second); + t->second = new_stats; + } + ceph::unordered_map::iterator i = osd_epochs.find(osd); + map::const_iterator j = inc.get_osd_epochs().find(osd); + assert(j != inc.get_osd_epochs().end()); + + if (i == osd_epochs.end()) + osd_epochs.insert(*j); + else + i->second = j->second; + + stat_osd_add(new_stats); + + // adjust [near]full status + register_nearfull_status(osd, new_stats); + } + set deleted_pools; + for (set::const_iterator p = inc.pg_remove.begin(); + p != inc.pg_remove.end(); + ++p) { + const pg_t &removed_pg(*p); + ceph::unordered_map::iterator s = pg_stat.find(removed_pg); + if (s != pg_stat.end()) { + stat_pg_sub(removed_pg, s->second); + pg_stat.erase(s); + } + if (removed_pg.ps() == 0) + deleted_pools.insert(removed_pg.pool()); + } + for (set::iterator p = deleted_pools.begin(); + p != deleted_pools.end(); + ++p) { + dout(20) << " deleted pool " << *p << dendl; + deleted_pool(*p); + } + + for (set::iterator p = inc.get_osd_stat_rm().begin(); + p != inc.get_osd_stat_rm().end(); + ++p) { + ceph::unordered_map::iterator t = osd_stat.find(*p); + if (t != osd_stat.end()) { + stat_osd_sub(t->second); + osd_stat.erase(t); + } + + // remove these old osds from full/nearfull set(s), too + nearfull_osds.erase(*p); + full_osds.erase(*p); + } + + // calculate a delta, and average over the last 2 deltas. + pool_stat_t d = pg_sum; + d.stats.sub(pg_sum_old.stats); + pg_sum_deltas.push_back(make_pair(d, delta_t)); + stamp_delta += delta_t; + + pg_sum_delta.stats.add(d.stats); + if (pg_sum_deltas.size() > (std::list< pair >::size_type)MAX(1, cct ? cct->_conf->mon_stat_smooth_intervals : 1)) { + pg_sum_delta.stats.sub(pg_sum_deltas.front().first.stats); + stamp_delta -= pg_sum_deltas.front().second; + pg_sum_deltas.pop_front(); + } + + update_pool_deltas(cct, inc.stamp, pg_pool_sum_old); + + if (inc.osdmap_epoch) + last_osdmap_epoch = inc.osdmap_epoch; + if (inc.pg_scan) + last_pg_scan = inc.pg_scan; + + min_last_epoch_clean = 0; // invalidate +} + +void PGMap::redo_full_sets() +{ + full_osds.clear(); + nearfull_osds.clear(); + for (ceph::unordered_map::iterator i = osd_stat.begin(); + i != osd_stat.end(); + ++i) { + register_nearfull_status(i->first, i->second); + } +} + +void PGMap::register_nearfull_status(int osd, const osd_stat_t& s) +{ + float ratio = ((float)s.kb_used) / ((float)s.kb); + + if (full_ratio > 0 && ratio > full_ratio) { + // full + full_osds.insert(osd); + nearfull_osds.erase(osd); + } else if (nearfull_ratio > 0 && ratio > nearfull_ratio) { + // nearfull + full_osds.erase(osd); + nearfull_osds.insert(osd); + } else { + // ok + full_osds.erase(osd); + nearfull_osds.erase(osd); + } +} + +void PGMap::calc_stats() +{ + num_pg = 0; + num_pg_active = 0; + num_osd = 0; + pg_pool_sum.clear(); + num_pg_by_pool.clear(); + pg_by_osd.clear(); + pg_sum = pool_stat_t(); + osd_sum = osd_stat_t(); + num_pg_by_state.clear(); + num_pg_by_osd.clear(); + + for (ceph::unordered_map::iterator p = pg_stat.begin(); + p != pg_stat.end(); + ++p) { + stat_pg_add(p->first, p->second); + } + for (ceph::unordered_map::iterator p = osd_stat.begin(); + p != osd_stat.end(); + ++p) + stat_osd_add(p->second); + + redo_full_sets(); + + min_last_epoch_clean = calc_min_last_epoch_clean(); +} + +void PGMap::update_pg(pg_t pgid, bufferlist& bl) +{ + bufferlist::iterator p = bl.begin(); + ceph::unordered_map::iterator s = pg_stat.find(pgid); + epoch_t old_lec = 0, lec; + if (s != pg_stat.end()) { + old_lec = s->second.get_effective_last_epoch_clean(); + stat_pg_update(pgid, s->second, p); + lec = s->second.get_effective_last_epoch_clean(); + } else { + pg_stat_t& r = pg_stat[pgid]; + ::decode(r, p); + stat_pg_add(pgid, r); + lec = r.get_effective_last_epoch_clean(); + } + + if (min_last_epoch_clean && + (lec < min_last_epoch_clean || // we did + (lec > min_last_epoch_clean && // we might + old_lec == min_last_epoch_clean) + )) + min_last_epoch_clean = 0; +} + +void PGMap::remove_pg(pg_t pgid) +{ + ceph::unordered_map::iterator s = pg_stat.find(pgid); + if (s != pg_stat.end()) { + if (min_last_epoch_clean && + s->second.get_effective_last_epoch_clean() == min_last_epoch_clean) + min_last_epoch_clean = 0; + stat_pg_sub(pgid, s->second); + pg_stat.erase(s); + } +} + +void PGMap::update_osd(int osd, bufferlist& bl) +{ + bufferlist::iterator p = bl.begin(); + ceph::unordered_map::iterator o = osd_stat.find(osd); + epoch_t old_lec = 0; + if (o != osd_stat.end()) { + ceph::unordered_map::iterator i = osd_epochs.find(osd); + if (i != osd_epochs.end()) + old_lec = i->second; + stat_osd_sub(o->second); + } + osd_stat_t& r = osd_stat[osd]; + ::decode(r, p); + stat_osd_add(r); + + // adjust [near]full status + register_nearfull_status(osd, r); + + // epoch? + if (!p.end()) { + epoch_t e; + ::decode(e, p); + + if (e < min_last_epoch_clean || + (e > min_last_epoch_clean && + old_lec == min_last_epoch_clean)) + min_last_epoch_clean = 0; + } else { + // WARNING: we are not refreshing min_last_epoch_clean! must be old store + // or old mon running. + } +} + +void PGMap::remove_osd(int osd) +{ + ceph::unordered_map::iterator o = osd_stat.find(osd); + if (o != osd_stat.end()) { + stat_osd_sub(o->second); + osd_stat.erase(o); + + // remove these old osds from full/nearfull set(s), too + nearfull_osds.erase(osd); + full_osds.erase(osd); + } +} + +void PGMap::stat_pg_add(const pg_t &pgid, const pg_stat_t &s, + bool sameosds) +{ + pg_pool_sum[pgid.pool()].add(s); + pg_sum.add(s); + + num_pg++; + num_pg_by_state[s.state]++; + + if ((s.state & PG_STATE_CREATING) && + s.parent_split_bits == 0) { + creating_pgs.insert(pgid); + if (s.acting_primary >= 0) { + creating_pgs_by_osd_epoch[s.acting_primary][s.mapping_epoch].insert(pgid); + } + } + + if (s.state & PG_STATE_ACTIVE) { + ++num_pg_active; + } + + if (sameosds) + return; + + for (vector::const_iterator p = s.blocked_by.begin(); + p != s.blocked_by.end(); + ++p) { + ++blocked_by_sum[*p]; + } + + for (vector::const_iterator p = s.acting.begin(); p != s.acting.end(); ++p) { + pg_by_osd[*p].insert(pgid); + num_pg_by_osd[*p].acting++; + } + for (vector::const_iterator p = s.up.begin(); p != s.up.end(); ++p) { + pg_by_osd[*p].insert(pgid); + num_pg_by_osd[*p].up++; + } + + if (s.up_primary >= 0) { + num_pg_by_osd[s.up_primary].primary++; + } +} + +void PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s, + bool sameosds) +{ + pool_stat_t& ps = pg_pool_sum[pgid.pool()]; + ps.sub(s); + if (ps.is_zero()) + pg_pool_sum.erase(pgid.pool()); + pg_sum.sub(s); + + num_pg--; + int end = --num_pg_by_state[s.state]; + assert(end >= 0); + if (end == 0) + num_pg_by_state.erase(s.state); + + if ((s.state & PG_STATE_CREATING) && + s.parent_split_bits == 0) { + creating_pgs.erase(pgid); + if (s.acting_primary >= 0) { + map >& r = creating_pgs_by_osd_epoch[s.acting_primary]; + r[s.mapping_epoch].erase(pgid); + if (r[s.mapping_epoch].empty()) + r.erase(s.mapping_epoch); + if (r.empty()) + creating_pgs_by_osd_epoch.erase(s.acting_primary); + } + } + + if (s.state & PG_STATE_ACTIVE) { + --num_pg_active; + } + + if (sameosds) + return; + + for (vector::const_iterator p = s.blocked_by.begin(); + p != s.blocked_by.end(); + ++p) { + ceph::unordered_map::iterator q = blocked_by_sum.find(*p); + assert(q != blocked_by_sum.end()); + --q->second; + if (q->second == 0) + blocked_by_sum.erase(q); + } + + for (vector::const_iterator p = s.acting.begin(); p != s.acting.end(); ++p) { + set& oset = pg_by_osd[*p]; + oset.erase(pgid); + if (oset.empty()) + pg_by_osd.erase(*p); + auto it = num_pg_by_osd.find(*p); + if (it != num_pg_by_osd.end() && it->second.acting > 0) + it->second.acting--; + } + for (vector::const_iterator p = s.up.begin(); p != s.up.end(); ++p) { + set& oset = pg_by_osd[*p]; + oset.erase(pgid); + if (oset.empty()) + pg_by_osd.erase(*p); + auto it = num_pg_by_osd.find(*p); + if (it != num_pg_by_osd.end() && it->second.up > 0) + it->second.up--; + } + + if (s.up_primary >= 0) { + auto it = num_pg_by_osd.find(s.up_primary); + if (it != num_pg_by_osd.end() && it->second.primary > 0) + it->second.primary--; + } +} + +void PGMap::stat_pg_update(const pg_t pgid, pg_stat_t& s, + bufferlist::iterator& blp) +{ + pg_stat_t n; + ::decode(n, blp); + + bool sameosds = s.acting == n.acting && s.up == n.up && s.blocked_by == n.blocked_by; @@ -604,6 +1431,11 @@ epoch_t PGMap::calc_min_last_epoch_clean() const return min; } +void PGMap::encode_digest(bufferlist& bl, uint64_t features) const +{ + PGMapDigest::encode(bl, features); +} + void PGMap::encode(bufferlist &bl, uint64_t features) const { if ((features & CEPH_FEATURE_MONENC) == 0) { @@ -671,7 +1503,25 @@ void PGMap::decode(bufferlist::iterator &bl) } DECODE_FINISH(bl); - calc_stats(); + calc_stats(); +} + +void PGMap::dirty_all(Incremental& inc) +{ + inc.osdmap_epoch = last_osdmap_epoch; + inc.pg_scan = last_pg_scan; + inc.full_ratio = full_ratio; + inc.nearfull_ratio = nearfull_ratio; + + for (ceph::unordered_map::const_iterator p = pg_stat.begin(); p != pg_stat.end(); ++p) { + inc.pg_stat_updates[p->first] = p->second; + } + for (ceph::unordered_map::const_iterator p = osd_stat.begin(); p != osd_stat.end(); ++p) { + assert(osd_epochs.count(p->first)); + inc.update_stat(p->first, + inc.get_osd_epochs().find(p->first)->second, + p->second); + } } void PGMap::dump(Formatter *f) const @@ -885,615 +1735,344 @@ void PGMap::dump_pool_stats(ostream& ss, bool header) const tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT); tab.define_column("DISK_LOG", TextTable::LEFT, TextTable::RIGHT); } else { - tab.define_column("", TextTable::LEFT, TextTable::LEFT); - tab.define_column("", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("", TextTable::LEFT, TextTable::RIGHT); - } - - for (ceph::unordered_map::const_iterator p = pg_pool_sum.begin(); - p != pg_pool_sum.end(); - ++p) { - tab << p->first - << p->second.stats.sum.num_objects - << p->second.stats.sum.num_objects_missing_on_primary - << p->second.stats.sum.num_objects_degraded - << p->second.stats.sum.num_objects_misplaced - << p->second.stats.sum.num_objects_unfound - << p->second.stats.sum.num_bytes - << p->second.log_size - << p->second.ondisk_log_size - << TextTable::endrow; - } - - ss << tab; -} - -void PGMap::dump_pg_sum_stats(ostream& ss, bool header) const -{ - TextTable tab; - - if (header) { - tab.define_column("PG_STAT", TextTable::LEFT, TextTable::LEFT); - tab.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("MISSING_ON_PRIMARY", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("DEGRADED", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("MISPLACED", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("BYTES", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("DISK_LOG", TextTable::LEFT, TextTable::RIGHT); - } else { - tab.define_column("", TextTable::LEFT, TextTable::LEFT); - tab.define_column("", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("", TextTable::LEFT, TextTable::RIGHT); - }; - - tab << "sum" - << pg_sum.stats.sum.num_objects - << pg_sum.stats.sum.num_objects_missing_on_primary - << pg_sum.stats.sum.num_objects_degraded - << pg_sum.stats.sum.num_objects_misplaced - << pg_sum.stats.sum.num_objects_unfound - << pg_sum.stats.sum.num_bytes - << pg_sum.log_size - << pg_sum.ondisk_log_size - << TextTable::endrow; - - ss << tab; -} - -void PGMap::dump_osd_stats(ostream& ss) const -{ - TextTable tab; - - tab.define_column("OSD_STAT", TextTable::LEFT, TextTable::LEFT); - tab.define_column("USED", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("AVAIL", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("TOTAL", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("HB_PEERS", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("PG_SUM", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("PRIMARY_PG_SUM", TextTable::LEFT, TextTable::RIGHT); - - for (ceph::unordered_map::const_iterator p = osd_stat.begin(); - p != osd_stat.end(); - ++p) { - tab << p->first - << si_t(p->second.kb_used << 10) - << si_t(p->second.kb_avail << 10) - << si_t(p->second.kb << 10) - << p->second.hb_peers - << get_num_pg_by_osd(p->first) - << get_num_primary_pg_by_osd(p->first) - << TextTable::endrow; - } - - tab << "sum" - << si_t(osd_sum.kb_used << 10) - << si_t(osd_sum.kb_avail << 10) - << si_t(osd_sum.kb << 10) - << TextTable::endrow; - - ss << tab; -} - -void PGMap::dump_osd_sum_stats(ostream& ss) const -{ - TextTable tab; - - tab.define_column("OSD_STAT", TextTable::LEFT, TextTable::LEFT); - tab.define_column("USED", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("AVAIL", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("TOTAL", TextTable::LEFT, TextTable::RIGHT); - - tab << "sum" - << si_t(osd_sum.kb_used << 10) - << si_t(osd_sum.kb_avail << 10) - << si_t(osd_sum.kb << 10) - << TextTable::endrow; - - ss << tab; -} - -void PGMap::get_stuck_stats(int types, const utime_t cutoff, - ceph::unordered_map& stuck_pgs) const -{ - assert(types != 0); - for (ceph::unordered_map::const_iterator i = pg_stat.begin(); - i != pg_stat.end(); - ++i) { - utime_t val = cutoff; // don't care about >= cutoff so that is infinity - - if ((types & STUCK_INACTIVE) && !(i->second.state & PG_STATE_ACTIVE)) { - if (i->second.last_active < val) - val = i->second.last_active; - } - - if ((types & STUCK_UNCLEAN) && !(i->second.state & PG_STATE_CLEAN)) { - if (i->second.last_clean < val) - val = i->second.last_clean; - } - - if ((types & STUCK_DEGRADED) && (i->second.state & PG_STATE_DEGRADED)) { - if (i->second.last_undegraded < val) - val = i->second.last_undegraded; - } - - if ((types & STUCK_UNDERSIZED) && (i->second.state & PG_STATE_UNDERSIZED)) { - if (i->second.last_fullsized < val) - val = i->second.last_fullsized; - } - - if ((types & STUCK_STALE) && (i->second.state & PG_STATE_STALE)) { - if (i->second.last_unstale < val) - val = i->second.last_unstale; - } - - // val is now the earliest any of the requested stuck states began - if (val < cutoff) { - stuck_pgs[i->first] = i->second; - } - } -} - -bool PGMap::get_stuck_counts(const utime_t cutoff, map& note) const -{ - int inactive = 0; - int unclean = 0; - int degraded = 0; - int undersized = 0; - int stale = 0; - - for (ceph::unordered_map::const_iterator i = pg_stat.begin(); - i != pg_stat.end(); - ++i) { - if (! (i->second.state & PG_STATE_ACTIVE)) { - if (i->second.last_active < cutoff) - ++inactive; - } - if (! (i->second.state & PG_STATE_CLEAN)) { - if (i->second.last_clean < cutoff) - ++unclean; - } - if (i->second.state & PG_STATE_DEGRADED) { - if (i->second.last_undegraded < cutoff) - ++degraded; - } - if (i->second.state & PG_STATE_UNDERSIZED) { - if (i->second.last_fullsized < cutoff) - ++undersized; - } - if (i->second.state & PG_STATE_STALE) { - if (i->second.last_unstale < cutoff) - ++stale; - } - } - - if (inactive) - note["stuck inactive"] = inactive; - - if (unclean) - note["stuck unclean"] = unclean; - - if (undersized) - note["stuck undersized"] = undersized; - - if (degraded) - note["stuck degraded"] = degraded; - - if (stale) - note["stuck stale"] = stale; - - return inactive || unclean || undersized || degraded || stale; -} - -void PGMap::dump_stuck(Formatter *f, int types, utime_t cutoff) const -{ - ceph::unordered_map stuck_pg_stats; - get_stuck_stats(types, cutoff, stuck_pg_stats); - f->open_array_section("stuck_pg_stats"); - for (ceph::unordered_map::const_iterator i = stuck_pg_stats.begin(); - i != stuck_pg_stats.end(); - ++i) { - f->open_object_section("pg_stat"); - f->dump_stream("pgid") << i->first; - i->second.dump(f); - f->close_section(); - } - f->close_section(); -} - -void PGMap::dump_stuck_plain(ostream& ss, int types, utime_t cutoff) const -{ - ceph::unordered_map stuck_pg_stats; - get_stuck_stats(types, cutoff, stuck_pg_stats); - if (!stuck_pg_stats.empty()) - dump_pg_stats_plain(ss, stuck_pg_stats, true); -} - -int PGMap::dump_stuck_pg_stats( - stringstream &ds, - Formatter *f, - int threshold, - vector& args) const -{ - int stuck_types = 0; - - for (vector::iterator i = args.begin(); i != args.end(); ++i) { - if (*i == "inactive") - stuck_types |= PGMap::STUCK_INACTIVE; - else if (*i == "unclean") - stuck_types |= PGMap::STUCK_UNCLEAN; - else if (*i == "undersized") - stuck_types |= PGMap::STUCK_UNDERSIZED; - else if (*i == "degraded") - stuck_types |= PGMap::STUCK_DEGRADED; - else if (*i == "stale") - stuck_types |= PGMap::STUCK_STALE; - else { - ds << "Unknown type: " << *i << std::endl; - return -EINVAL; - } + tab.define_column("", TextTable::LEFT, TextTable::LEFT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); } - utime_t now(ceph_clock_now()); - utime_t cutoff = now - utime_t(threshold, 0); - - if (!f) { - dump_stuck_plain(ds, stuck_types, cutoff); - } else { - dump_stuck(f, stuck_types, cutoff); - f->flush(ds); + for (ceph::unordered_map::const_iterator p = pg_pool_sum.begin(); + p != pg_pool_sum.end(); + ++p) { + tab << p->first + << p->second.stats.sum.num_objects + << p->second.stats.sum.num_objects_missing_on_primary + << p->second.stats.sum.num_objects_degraded + << p->second.stats.sum.num_objects_misplaced + << p->second.stats.sum.num_objects_unfound + << p->second.stats.sum.num_bytes + << p->second.log_size + << p->second.ondisk_log_size + << TextTable::endrow; } - return 0; + ss << tab; } -void PGMap::dump_osd_perf_stats(Formatter *f) const -{ - f->open_array_section("osd_perf_infos"); - for (ceph::unordered_map::const_iterator i = osd_stat.begin(); - i != osd_stat.end(); - ++i) { - f->open_object_section("osd"); - f->dump_int("id", i->first); - { - f->open_object_section("perf_stats"); - i->second.os_perf_stat.dump(f); - f->close_section(); - } - f->close_section(); - } - f->close_section(); -} -void PGMap::print_osd_perf_stats(std::ostream *ss) const +void PGMap::dump_pg_sum_stats(ostream& ss, bool header) const { TextTable tab; - tab.define_column("osd", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("commit_latency(ms)", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("apply_latency(ms)", TextTable::LEFT, TextTable::RIGHT); - for (ceph::unordered_map::const_iterator i = osd_stat.begin(); - i != osd_stat.end(); - ++i) { - tab << i->first; - tab << i->second.os_perf_stat.os_commit_latency; - tab << i->second.os_perf_stat.os_apply_latency; - tab << TextTable::endrow; - } - (*ss) << tab; -} -void PGMap::dump_osd_blocked_by_stats(Formatter *f) const -{ - f->open_array_section("osd_blocked_by_infos"); - for (ceph::unordered_map::const_iterator i = blocked_by_sum.begin(); - i != blocked_by_sum.end(); - ++i) { - f->open_object_section("osd"); - f->dump_int("id", i->first); - f->dump_int("num_blocked", i->second); - f->close_section(); - } - f->close_section(); + if (header) { + tab.define_column("PG_STAT", TextTable::LEFT, TextTable::LEFT); + tab.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("MISSING_ON_PRIMARY", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("DEGRADED", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("MISPLACED", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("BYTES", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("DISK_LOG", TextTable::LEFT, TextTable::RIGHT); + } else { + tab.define_column("", TextTable::LEFT, TextTable::LEFT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("", TextTable::LEFT, TextTable::RIGHT); + }; + + tab << "sum" + << pg_sum.stats.sum.num_objects + << pg_sum.stats.sum.num_objects_missing_on_primary + << pg_sum.stats.sum.num_objects_degraded + << pg_sum.stats.sum.num_objects_misplaced + << pg_sum.stats.sum.num_objects_unfound + << pg_sum.stats.sum.num_bytes + << pg_sum.log_size + << pg_sum.ondisk_log_size + << TextTable::endrow; + + ss << tab; } -void PGMap::print_osd_blocked_by_stats(std::ostream *ss) const + +void PGMap::dump_osd_stats(ostream& ss) const { TextTable tab; - tab.define_column("osd", TextTable::LEFT, TextTable::RIGHT); - tab.define_column("num_blocked", TextTable::LEFT, TextTable::RIGHT); - for (ceph::unordered_map::const_iterator i = blocked_by_sum.begin(); - i != blocked_by_sum.end(); - ++i) { - tab << i->first; - tab << i->second; - tab << TextTable::endrow; - } - (*ss) << tab; -} -void PGMap::recovery_summary(Formatter *f, list *psl, - const pool_stat_t& delta_sum) const -{ - if (delta_sum.stats.sum.num_objects_degraded && delta_sum.stats.sum.num_object_copies > 0) { - double pc = (double)delta_sum.stats.sum.num_objects_degraded / - (double)delta_sum.stats.sum.num_object_copies * (double)100.0; - char b[20]; - snprintf(b, sizeof(b), "%.3lf", pc); - if (f) { - f->dump_unsigned("degraded_objects", delta_sum.stats.sum.num_objects_degraded); - f->dump_unsigned("degraded_total", delta_sum.stats.sum.num_object_copies); - f->dump_float("degraded_ratio", pc / 100.0); - } else { - ostringstream ss; - ss << delta_sum.stats.sum.num_objects_degraded - << "/" << delta_sum.stats.sum.num_object_copies << " objects degraded (" << b << "%)"; - psl->push_back(ss.str()); - } - } - if (delta_sum.stats.sum.num_objects_misplaced && delta_sum.stats.sum.num_object_copies > 0) { - double pc = (double)delta_sum.stats.sum.num_objects_misplaced / - (double)delta_sum.stats.sum.num_object_copies * (double)100.0; - char b[20]; - snprintf(b, sizeof(b), "%.3lf", pc); - if (f) { - f->dump_unsigned("misplaced_objects", delta_sum.stats.sum.num_objects_misplaced); - f->dump_unsigned("misplaced_total", delta_sum.stats.sum.num_object_copies); - f->dump_float("misplaced_ratio", pc / 100.0); - } else { - ostringstream ss; - ss << delta_sum.stats.sum.num_objects_misplaced - << "/" << delta_sum.stats.sum.num_object_copies << " objects misplaced (" << b << "%)"; - psl->push_back(ss.str()); - } - } - if (delta_sum.stats.sum.num_objects_unfound && delta_sum.stats.sum.num_objects) { - double pc = (double)delta_sum.stats.sum.num_objects_unfound / - (double)delta_sum.stats.sum.num_objects * (double)100.0; - char b[20]; - snprintf(b, sizeof(b), "%.3lf", pc); - if (f) { - f->dump_unsigned("unfound_objects", delta_sum.stats.sum.num_objects_unfound); - f->dump_unsigned("unfound_total", delta_sum.stats.sum.num_objects); - f->dump_float("unfound_ratio", pc / 100.0); - } else { - ostringstream ss; - ss << delta_sum.stats.sum.num_objects_unfound - << "/" << delta_sum.stats.sum.num_objects << " unfound (" << b << "%)"; - psl->push_back(ss.str()); - } + tab.define_column("OSD_STAT", TextTable::LEFT, TextTable::LEFT); + tab.define_column("USED", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("AVAIL", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("TOTAL", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("HB_PEERS", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("PG_SUM", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("PRIMARY_PG_SUM", TextTable::LEFT, TextTable::RIGHT); + + for (ceph::unordered_map::const_iterator p = osd_stat.begin(); + p != osd_stat.end(); + ++p) { + tab << p->first + << si_t(p->second.kb_used << 10) + << si_t(p->second.kb_avail << 10) + << si_t(p->second.kb << 10) + << p->second.hb_peers + << get_num_pg_by_osd(p->first) + << get_num_primary_pg_by_osd(p->first) + << TextTable::endrow; } + + tab << "sum" + << si_t(osd_sum.kb_used << 10) + << si_t(osd_sum.kb_avail << 10) + << si_t(osd_sum.kb << 10) + << TextTable::endrow; + + ss << tab; } -void PGMap::recovery_rate_summary(Formatter *f, ostream *out, - const pool_stat_t& delta_sum, - utime_t delta_stamp) const +void PGMap::dump_osd_sum_stats(ostream& ss) const { - // make non-negative; we can get negative values if osds send - // uncommitted stats and then "go backward" or if they are just - // buggy/wrong. - pool_stat_t pos_delta = delta_sum; - pos_delta.floor(0); - if (pos_delta.stats.sum.num_objects_recovered || - pos_delta.stats.sum.num_bytes_recovered || - pos_delta.stats.sum.num_keys_recovered) { - int64_t objps = pos_delta.stats.sum.num_objects_recovered / (double)delta_stamp; - int64_t bps = pos_delta.stats.sum.num_bytes_recovered / (double)delta_stamp; - int64_t kps = pos_delta.stats.sum.num_keys_recovered / (double)delta_stamp; - if (f) { - f->dump_int("recovering_objects_per_sec", objps); - f->dump_int("recovering_bytes_per_sec", bps); - f->dump_int("recovering_keys_per_sec", kps); - f->dump_int("num_objects_recovered", pos_delta.stats.sum.num_objects_recovered); - f->dump_int("num_bytes_recovered", pos_delta.stats.sum.num_bytes_recovered); - f->dump_int("num_keys_recovered", pos_delta.stats.sum.num_keys_recovered); - } else { - *out << pretty_si_t(bps) << "B/s"; - if (pos_delta.stats.sum.num_keys_recovered) - *out << ", " << pretty_si_t(kps) << "keys/s"; - *out << ", " << pretty_si_t(objps) << "objects/s"; - } - } + TextTable tab; + + tab.define_column("OSD_STAT", TextTable::LEFT, TextTable::LEFT); + tab.define_column("USED", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("AVAIL", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("TOTAL", TextTable::LEFT, TextTable::RIGHT); + + tab << "sum" + << si_t(osd_sum.kb_used << 10) + << si_t(osd_sum.kb_avail << 10) + << si_t(osd_sum.kb << 10) + << TextTable::endrow; + + ss << tab; } -void PGMap::overall_recovery_rate_summary(Formatter *f, ostream *out) const +void PGMap::get_stuck_stats(int types, const utime_t cutoff, + ceph::unordered_map& stuck_pgs) const { - recovery_rate_summary(f, out, pg_sum_delta, stamp_delta); -} + assert(types != 0); + for (ceph::unordered_map::const_iterator i = pg_stat.begin(); + i != pg_stat.end(); + ++i) { + utime_t val = cutoff; // don't care about >= cutoff so that is infinity + + if ((types & STUCK_INACTIVE) && !(i->second.state & PG_STATE_ACTIVE)) { + if (i->second.last_active < val) + val = i->second.last_active; + } -void PGMap::overall_recovery_summary(Formatter *f, list *psl) const -{ - recovery_summary(f, psl, pg_sum); -} + if ((types & STUCK_UNCLEAN) && !(i->second.state & PG_STATE_CLEAN)) { + if (i->second.last_clean < val) + val = i->second.last_clean; + } -void PGMap::pool_recovery_rate_summary(Formatter *f, ostream *out, - uint64_t poolid) const -{ - ceph::unordered_map >::const_iterator p = - per_pool_sum_delta.find(poolid); - if (p == per_pool_sum_delta.end()) - return; + if ((types & STUCK_DEGRADED) && (i->second.state & PG_STATE_DEGRADED)) { + if (i->second.last_undegraded < val) + val = i->second.last_undegraded; + } - ceph::unordered_map::const_iterator ts = - per_pool_sum_deltas_stamps.find(p->first); - assert(ts != per_pool_sum_deltas_stamps.end()); - recovery_rate_summary(f, out, p->second.first, ts->second); -} + if ((types & STUCK_UNDERSIZED) && (i->second.state & PG_STATE_UNDERSIZED)) { + if (i->second.last_fullsized < val) + val = i->second.last_fullsized; + } -void PGMap::pool_recovery_summary(Formatter *f, list *psl, - uint64_t poolid) const -{ - ceph::unordered_map >::const_iterator p = - per_pool_sum_delta.find(poolid); - if (p == per_pool_sum_delta.end()) - return; + if ((types & STUCK_STALE) && (i->second.state & PG_STATE_STALE)) { + if (i->second.last_unstale < val) + val = i->second.last_unstale; + } - recovery_summary(f, psl, p->second.first); + // val is now the earliest any of the requested stuck states began + if (val < cutoff) { + stuck_pgs[i->first] = i->second; + } + } } -void PGMap::client_io_rate_summary(Formatter *f, ostream *out, - const pool_stat_t& delta_sum, - utime_t delta_stamp) const +bool PGMap::get_stuck_counts(const utime_t cutoff, map& note) const { - pool_stat_t pos_delta = delta_sum; - pos_delta.floor(0); - if (pos_delta.stats.sum.num_rd || - pos_delta.stats.sum.num_wr) { - if (pos_delta.stats.sum.num_rd) { - int64_t rd = (pos_delta.stats.sum.num_rd_kb << 10) / (double)delta_stamp; - if (f) { - f->dump_int("read_bytes_sec", rd); - } else { - *out << pretty_si_t(rd) << "B/s rd, "; - } + int inactive = 0; + int unclean = 0; + int degraded = 0; + int undersized = 0; + int stale = 0; + + for (ceph::unordered_map::const_iterator i = pg_stat.begin(); + i != pg_stat.end(); + ++i) { + if (! (i->second.state & PG_STATE_ACTIVE)) { + if (i->second.last_active < cutoff) + ++inactive; } - if (pos_delta.stats.sum.num_wr) { - int64_t wr = (pos_delta.stats.sum.num_wr_kb << 10) / (double)delta_stamp; - if (f) { - f->dump_int("write_bytes_sec", wr); - } else { - *out << pretty_si_t(wr) << "B/s wr, "; - } + if (! (i->second.state & PG_STATE_CLEAN)) { + if (i->second.last_clean < cutoff) + ++unclean; } - int64_t iops_rd = pos_delta.stats.sum.num_rd / (double)delta_stamp; - int64_t iops_wr = pos_delta.stats.sum.num_wr / (double)delta_stamp; - if (f) { - f->dump_int("read_op_per_sec", iops_rd); - f->dump_int("write_op_per_sec", iops_wr); - } else { - *out << pretty_si_t(iops_rd) << "op/s rd, " << pretty_si_t(iops_wr) << "op/s wr"; + if (i->second.state & PG_STATE_DEGRADED) { + if (i->second.last_undegraded < cutoff) + ++degraded; + } + if (i->second.state & PG_STATE_UNDERSIZED) { + if (i->second.last_fullsized < cutoff) + ++undersized; + } + if (i->second.state & PG_STATE_STALE) { + if (i->second.last_unstale < cutoff) + ++stale; } } + + if (inactive) + note["stuck inactive"] = inactive; + + if (unclean) + note["stuck unclean"] = unclean; + + if (undersized) + note["stuck undersized"] = undersized; + + if (degraded) + note["stuck degraded"] = degraded; + + if (stale) + note["stuck stale"] = stale; + + return inactive || unclean || undersized || degraded || stale; } -void PGMap::overall_client_io_rate_summary(Formatter *f, ostream *out) const +void PGMap::dump_stuck(Formatter *f, int types, utime_t cutoff) const { - client_io_rate_summary(f, out, pg_sum_delta, stamp_delta); + ceph::unordered_map stuck_pg_stats; + get_stuck_stats(types, cutoff, stuck_pg_stats); + f->open_array_section("stuck_pg_stats"); + for (ceph::unordered_map::const_iterator i = stuck_pg_stats.begin(); + i != stuck_pg_stats.end(); + ++i) { + f->open_object_section("pg_stat"); + f->dump_stream("pgid") << i->first; + i->second.dump(f); + f->close_section(); + } + f->close_section(); } -void PGMap::pool_client_io_rate_summary(Formatter *f, ostream *out, - uint64_t poolid) const +void PGMap::dump_stuck_plain(ostream& ss, int types, utime_t cutoff) const { - ceph::unordered_map >::const_iterator p = - per_pool_sum_delta.find(poolid); - if (p == per_pool_sum_delta.end()) - return; - - ceph::unordered_map::const_iterator ts = - per_pool_sum_deltas_stamps.find(p->first); - assert(ts != per_pool_sum_deltas_stamps.end()); - client_io_rate_summary(f, out, p->second.first, ts->second); + ceph::unordered_map stuck_pg_stats; + get_stuck_stats(types, cutoff, stuck_pg_stats); + if (!stuck_pg_stats.empty()) + dump_pg_stats_plain(ss, stuck_pg_stats, true); } -void PGMap::cache_io_rate_summary(Formatter *f, ostream *out, - const pool_stat_t& delta_sum, - utime_t delta_stamp) const +int PGMap::dump_stuck_pg_stats( + stringstream &ds, + Formatter *f, + int threshold, + vector& args) const { - pool_stat_t pos_delta = delta_sum; - pos_delta.floor(0); - bool have_output = false; + int stuck_types = 0; - if (pos_delta.stats.sum.num_flush) { - int64_t flush = (pos_delta.stats.sum.num_flush_kb << 10) / (double)delta_stamp; - if (f) { - f->dump_int("flush_bytes_sec", flush); - } else { - *out << pretty_si_t(flush) << "B/s flush"; - have_output = true; - } - } - if (pos_delta.stats.sum.num_evict) { - int64_t evict = (pos_delta.stats.sum.num_evict_kb << 10) / (double)delta_stamp; - if (f) { - f->dump_int("evict_bytes_sec", evict); - } else { - if (have_output) - *out << ", "; - *out << pretty_si_t(evict) << "B/s evict"; - have_output = true; - } - } - if (pos_delta.stats.sum.num_promote) { - int64_t promote = pos_delta.stats.sum.num_promote / (double)delta_stamp; - if (f) { - f->dump_int("promote_op_per_sec", promote); - } else { - if (have_output) - *out << ", "; - *out << pretty_si_t(promote) << "op/s promote"; - have_output = true; - } - } - if (pos_delta.stats.sum.num_flush_mode_low) { - if (f) { - f->dump_int("num_flush_mode_low", pos_delta.stats.sum.num_flush_mode_low); - } else { - if (have_output) - *out << ", "; - *out << pretty_si_t(pos_delta.stats.sum.num_flush_mode_low) << "PG(s) flushing"; - have_output = true; + for (vector::iterator i = args.begin(); i != args.end(); ++i) { + if (*i == "inactive") + stuck_types |= PGMap::STUCK_INACTIVE; + else if (*i == "unclean") + stuck_types |= PGMap::STUCK_UNCLEAN; + else if (*i == "undersized") + stuck_types |= PGMap::STUCK_UNDERSIZED; + else if (*i == "degraded") + stuck_types |= PGMap::STUCK_DEGRADED; + else if (*i == "stale") + stuck_types |= PGMap::STUCK_STALE; + else { + ds << "Unknown type: " << *i << std::endl; + return -EINVAL; } } - if (pos_delta.stats.sum.num_flush_mode_high) { - if (f) { - f->dump_int("num_flush_mode_high", pos_delta.stats.sum.num_flush_mode_high); - } else { - if (have_output) - *out << ", "; - *out << pretty_si_t(pos_delta.stats.sum.num_flush_mode_high) << "PG(s) flushing (high)"; - have_output = true; - } + + utime_t now(ceph_clock_now()); + utime_t cutoff = now - utime_t(threshold, 0); + + if (!f) { + dump_stuck_plain(ds, stuck_types, cutoff); + } else { + dump_stuck(f, stuck_types, cutoff); + f->flush(ds); } - if (pos_delta.stats.sum.num_evict_mode_some) { - if (f) { - f->dump_int("num_evict_mode_some", pos_delta.stats.sum.num_evict_mode_some); - } else { - if (have_output) - *out << ", "; - *out << pretty_si_t(pos_delta.stats.sum.num_evict_mode_some) << "PG(s) evicting"; - have_output = true; + + return 0; +} + +void PGMap::dump_osd_perf_stats(Formatter *f) const +{ + f->open_array_section("osd_perf_infos"); + for (ceph::unordered_map::const_iterator i = osd_stat.begin(); + i != osd_stat.end(); + ++i) { + f->open_object_section("osd"); + f->dump_int("id", i->first); + { + f->open_object_section("perf_stats"); + i->second.os_perf_stat.dump(f); + f->close_section(); } + f->close_section(); } - if (pos_delta.stats.sum.num_evict_mode_full) { - if (f) { - f->dump_int("num_evict_mode_full", pos_delta.stats.sum.num_evict_mode_full); - } else { - if (have_output) - *out << ", "; - *out << pretty_si_t(pos_delta.stats.sum.num_evict_mode_full) << "PG(s) evicting (full)"; - } + f->close_section(); +} +void PGMap::print_osd_perf_stats(std::ostream *ss) const +{ + TextTable tab; + tab.define_column("osd", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("commit_latency(ms)", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("apply_latency(ms)", TextTable::LEFT, TextTable::RIGHT); + for (ceph::unordered_map::const_iterator i = osd_stat.begin(); + i != osd_stat.end(); + ++i) { + tab << i->first; + tab << i->second.os_perf_stat.os_commit_latency; + tab << i->second.os_perf_stat.os_apply_latency; + tab << TextTable::endrow; } + (*ss) << tab; } -void PGMap::overall_cache_io_rate_summary(Formatter *f, ostream *out) const +void PGMap::dump_osd_blocked_by_stats(Formatter *f) const { - cache_io_rate_summary(f, out, pg_sum_delta, stamp_delta); + f->open_array_section("osd_blocked_by_infos"); + for (ceph::unordered_map::const_iterator i = blocked_by_sum.begin(); + i != blocked_by_sum.end(); + ++i) { + f->open_object_section("osd"); + f->dump_int("id", i->first); + f->dump_int("num_blocked", i->second); + f->close_section(); + } + f->close_section(); } - -void PGMap::pool_cache_io_rate_summary(Formatter *f, ostream *out, - uint64_t poolid) const +void PGMap::print_osd_blocked_by_stats(std::ostream *ss) const { - ceph::unordered_map >::const_iterator p = - per_pool_sum_delta.find(poolid); - if (p == per_pool_sum_delta.end()) - return; - - ceph::unordered_map::const_iterator ts = - per_pool_sum_deltas_stamps.find(p->first); - assert(ts != per_pool_sum_deltas_stamps.end()); - cache_io_rate_summary(f, out, p->second.first, ts->second); + TextTable tab; + tab.define_column("osd", TextTable::LEFT, TextTable::RIGHT); + tab.define_column("num_blocked", TextTable::LEFT, TextTable::RIGHT); + for (ceph::unordered_map::const_iterator i = blocked_by_sum.begin(); + i != blocked_by_sum.end(); + ++i) { + tab << i->first; + tab << i->second; + tab << TextTable::endrow; + } + (*ss) << tab; } + /** * update aggregated delta * @@ -1603,212 +2182,14 @@ void PGMap::update_pool_deltas(CephContext *cct, const utime_t ts, for (ceph::unordered_map::const_iterator it = pg_pool_sum_old.begin(); it != pg_pool_sum_old.end(); ++it) { update_one_pool_delta(cct, ts, it->first, it->second); - } -} - -void PGMap::clear_delta() -{ - pg_sum_delta = pool_stat_t(); - pg_sum_deltas.clear(); - stamp_delta = utime_t(); -} - -void PGMap::print_summary(Formatter *f, ostream *out) const -{ - if (f) - f->open_array_section("pgs_by_state"); - - // list is descending numeric order (by count) - multimap state_by_count; // count -> state - for (ceph::unordered_map::const_iterator p = num_pg_by_state.begin(); - p != num_pg_by_state.end(); - ++p) { - state_by_count.insert(make_pair(p->second, p->first)); - } - if (f) { - for (multimap::reverse_iterator p = state_by_count.rbegin(); - p != state_by_count.rend(); - ++p) - { - f->open_object_section("pgs_by_state_element"); - f->dump_string("state_name", pg_state_string(p->second)); - f->dump_unsigned("count", p->first); - f->close_section(); - } - } - if (f) - f->close_section(); - - if (f) { - f->dump_unsigned("num_pgs", num_pg); - f->dump_unsigned("num_pools", pg_pool_sum.size()); - f->dump_unsigned("num_objects", pg_sum.stats.sum.num_objects); - f->dump_unsigned("data_bytes", pg_sum.stats.sum.num_bytes); - f->dump_unsigned("bytes_used", osd_sum.kb_used * 1024ull); - f->dump_unsigned("bytes_avail", osd_sum.kb_avail * 1024ull); - f->dump_unsigned("bytes_total", osd_sum.kb * 1024ull); - } else { - *out << " pools: " << pg_pool_sum.size() << " pools, " - << num_pg << " pgs\n"; - *out << " objects: " << si_t(pg_sum.stats.sum.num_objects) << " objects, " - << prettybyte_t(pg_sum.stats.sum.num_bytes) << "\n"; - *out << " usage: " - << kb_t(osd_sum.kb_used) << " used, " - << kb_t(osd_sum.kb_avail) << " / " - << kb_t(osd_sum.kb) << " avail\n"; - *out << " pgs: "; - } - - bool pad = false; - if (num_pg_active < num_pg) { - float p = (float)num_pg_active / (float)num_pg; - if (f) { - f->dump_float("active_pgs_ratio", p); - } else { - char b[20]; - snprintf(b, sizeof(b), "%.3f", (1.0 - p) * 100.0); - *out << b << "% pgs inactive\n"; - pad = true; - } - } - - list sl; - overall_recovery_summary(f, &sl); - if (!f && !sl.empty()) { - for (list::iterator p = sl.begin(); p != sl.end(); ++p) { - if (pad) { - *out << " "; - } - *out << *p << "\n"; - pad = true; - } - } - sl.clear(); - - if (!f) { - unsigned max_width = 1; - for (multimap::reverse_iterator p = state_by_count.rbegin(); - p != state_by_count.rend(); - ++p) - { - std::stringstream ss; - ss << p->first; - max_width = MAX(ss.str().size(), max_width); - } - - for (multimap::reverse_iterator p = state_by_count.rbegin(); - p != state_by_count.rend(); - ++p) - { - if (pad) { - *out << " "; - } - pad = true; - out->setf(std::ios::left); - *out << std::setw(max_width) << p->first - << " " << pg_state_string(p->second) << "\n"; - out->unsetf(std::ios::left); - } - } - - ostringstream ss_rec_io; - overall_recovery_rate_summary(f, &ss_rec_io); - ostringstream ss_client_io; - overall_client_io_rate_summary(f, &ss_client_io); - ostringstream ss_cache_io; - overall_cache_io_rate_summary(f, &ss_cache_io); - - if (!f && (ss_client_io.str().length() || ss_rec_io.str().length() - || ss_cache_io.str().length())) { - *out << "\n \n"; - *out << " io:\n"; - } - - if (!f && ss_client_io.str().length()) - *out << " client: " << ss_client_io.str() << "\n"; - if (!f && ss_rec_io.str().length()) - *out << " recovery: " << ss_rec_io.str() << "\n"; - if (!f && ss_cache_io.str().length()) - *out << " cache: " << ss_cache_io.str() << "\n"; -} - -void PGMap::print_oneline_summary(Formatter *f, ostream *out) const -{ - std::stringstream ss; - - if (f) - f->open_array_section("num_pg_by_state"); - for (ceph::unordered_map::const_iterator p = num_pg_by_state.begin(); - p != num_pg_by_state.end(); - ++p) { - if (f) { - f->open_object_section("state"); - f->dump_string("name", pg_state_string(p->first)); - f->dump_unsigned("num", p->second); - f->close_section(); - } - if (p != num_pg_by_state.begin()) - ss << ", "; - ss << p->second << " " << pg_state_string(p->first); - } - if (f) - f->close_section(); - - string states = ss.str(); - if (out) - *out << num_pg << " pgs: " - << states << "; " - << prettybyte_t(pg_sum.stats.sum.num_bytes) << " data, " - << kb_t(osd_sum.kb_used) << " used, " - << kb_t(osd_sum.kb_avail) << " / " - << kb_t(osd_sum.kb) << " avail"; - if (f) { - f->dump_unsigned("num_pgs", num_pg); - f->dump_unsigned("num_bytes", pg_sum.stats.sum.num_bytes); - f->dump_unsigned("raw_bytes_used", osd_sum.kb_used << 10); - f->dump_unsigned("raw_bytes_avail", osd_sum.kb_avail << 10); - f->dump_unsigned("raw_bytes", osd_sum.kb << 10); - } - - // make non-negative; we can get negative values if osds send - // uncommitted stats and then "go backward" or if they are just - // buggy/wrong. - pool_stat_t pos_delta = pg_sum_delta; - pos_delta.floor(0); - if (pos_delta.stats.sum.num_rd || - pos_delta.stats.sum.num_wr) { - if (out) - *out << "; "; - if (pos_delta.stats.sum.num_rd) { - int64_t rd = (pos_delta.stats.sum.num_rd_kb << 10) / (double)stamp_delta; - if (out) - *out << pretty_si_t(rd) << "B/s rd, "; - if (f) - f->dump_unsigned("read_bytes_sec", rd); - } - if (pos_delta.stats.sum.num_wr) { - int64_t wr = (pos_delta.stats.sum.num_wr_kb << 10) / (double)stamp_delta; - if (out) - *out << pretty_si_t(wr) << "B/s wr, "; - if (f) - f->dump_unsigned("write_bytes_sec", wr); - } - int64_t iops = (pos_delta.stats.sum.num_rd + pos_delta.stats.sum.num_wr) / (double)stamp_delta; - if (out) - *out << pretty_si_t(iops) << "op/s"; - if (f) - f->dump_unsigned("io_sec", iops); - } - - list sl; - overall_recovery_summary(f, &sl); - if (out) - for (list::iterator p = sl.begin(); p != sl.end(); ++p) - *out << "; " << *p; - std::stringstream ssr; - overall_recovery_rate_summary(f, &ssr); - if (out && ssr.str().length()) - *out << "; " << ssr.str() << " recovering"; + } +} + +void PGMap::clear_delta() +{ + pg_sum_delta = pool_stat_t(); + pg_sum_deltas.clear(); + stamp_delta = utime_t(); } void PGMap::generate_test_instances(list& o) @@ -1916,268 +2297,6 @@ void PGMap::dump_filtered_pg_stats(ostream& ss, set& pgs) const ss << tab; } -int64_t PGMap::get_rule_avail(const OSDMap& osdmap, int ruleno) const -{ - map wm; - int r = osdmap.crush->get_rule_weight_osd_map(ruleno, &wm); - if (r < 0) { - return r; - } - if (wm.empty()) { - return 0; - } - - float fratio; - if (osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS && - osdmap.get_full_ratio() > 0) { - fratio = osdmap.get_full_ratio(); - } else if (full_ratio > 0) { - fratio = full_ratio; - } else { - // this shouldn't really happen - fratio = g_conf->mon_osd_full_ratio; - if (fratio > 1.0) fratio /= 100; - } - - int64_t min = -1; - for (map::iterator p = wm.begin(); p != wm.end(); ++p) { - ceph::unordered_map::const_iterator osd_info = - osd_stat.find(p->first); - if (osd_info != osd_stat.end()) { - if (osd_info->second.kb == 0 || p->second == 0) { - // osd must be out, hence its stats have been zeroed - // (unless we somehow managed to have a disk with size 0...) - // - // (p->second == 0), if osd weight is 0, no need to - // calculate proj below. - continue; - } - double unusable = (double)osd_info->second.kb * - (1.0 - fratio); - double avail = MAX(0.0, (double)osd_info->second.kb_avail - unusable); - avail *= 1024.0; - int64_t proj = (int64_t)(avail / (double)p->second); - if (min < 0 || proj < min) { - min = proj; - } - } else { - dout(0) << "Cannot get stat of OSD " << p->first << dendl; - } - } - return min; -} - -inline std::string percentify(const float& a) { - std::stringstream ss; - if (a < 0.01) - ss << "0"; - else - ss << std::fixed << std::setprecision(2) << a; - return ss.str(); -} - -void PGMap::dump_pool_stats(const OSDMap &osd_map, stringstream *ss, - Formatter *f, bool verbose) const -{ - TextTable tbl; - - if (f) { - f->open_array_section("pools"); - } else { - tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT); - tbl.define_column("ID", TextTable::LEFT, TextTable::LEFT); - if (verbose) { - tbl.define_column("QUOTA OBJECTS", TextTable::LEFT, TextTable::LEFT); - tbl.define_column("QUOTA BYTES", TextTable::LEFT, TextTable::LEFT); - } - - tbl.define_column("USED", TextTable::LEFT, TextTable::RIGHT); - tbl.define_column("%USED", TextTable::LEFT, TextTable::RIGHT); - tbl.define_column("MAX AVAIL", TextTable::LEFT, TextTable::RIGHT); - tbl.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT); - if (verbose) { - tbl.define_column("DIRTY", TextTable::LEFT, TextTable::RIGHT); - tbl.define_column("READ", TextTable::LEFT, TextTable::RIGHT); - tbl.define_column("WRITE", TextTable::LEFT, TextTable::RIGHT); - tbl.define_column("RAW USED", TextTable::LEFT, TextTable::RIGHT); - } - } - - map avail_by_rule; - for (map::const_iterator p = osd_map.get_pools().begin(); - p != osd_map.get_pools().end(); ++p) { - int64_t pool_id = p->first; - if ((pool_id < 0) || (pg_pool_sum.count(pool_id) == 0)) - continue; - const string& pool_name = osd_map.get_pool_name(pool_id); - const pool_stat_t &stat = pg_pool_sum.at(pool_id); - - const pg_pool_t *pool = osd_map.get_pg_pool(pool_id); - int ruleno = osd_map.crush->find_rule(pool->get_crush_ruleset(), - pool->get_type(), - pool->get_size()); - int64_t avail; - float raw_used_rate; - if (avail_by_rule.count(ruleno) == 0) { - avail = get_rule_avail(osd_map, ruleno); - if (avail < 0) - avail = 0; - avail_by_rule[ruleno] = avail; - } else { - avail = avail_by_rule[ruleno]; - } - switch (pool->get_type()) { - case pg_pool_t::TYPE_REPLICATED: - avail /= pool->get_size(); - raw_used_rate = pool->get_size(); - break; - case pg_pool_t::TYPE_ERASURE: - { - auto& ecp = - osd_map.get_erasure_code_profile(pool->erasure_code_profile); - auto pm = ecp.find("m"); - auto pk = ecp.find("k"); - if (pm != ecp.end() && pk != ecp.end()) { - int k = atoi(pk->second.c_str()); - int m = atoi(pm->second.c_str()); - avail = avail * k / (m + k); - raw_used_rate = (float)(m + k) / k; - } else { - raw_used_rate = 0.0; - } - } - break; - default: - assert(0 == "unrecognized pool type"); - } - - if (f) { - f->open_object_section("pool"); - f->dump_string("name", pool_name); - f->dump_int("id", pool_id); - f->open_object_section("stats"); - } else { - tbl << pool_name - << pool_id; - if (verbose) { - if (pool->quota_max_objects == 0) - tbl << "N/A"; - else - tbl << si_t(pool->quota_max_objects); - - if (pool->quota_max_bytes == 0) - tbl << "N/A"; - else - tbl << si_t(pool->quota_max_bytes); - } - - } - dump_object_stat_sum(tbl, f, stat.stats.sum, avail, raw_used_rate, verbose, pool); - if (f) - f->close_section(); // stats - else - tbl << TextTable::endrow; - - if (f) - f->close_section(); // pool - } - if (f) - f->close_section(); - else { - assert(ss != nullptr); - *ss << "POOLS:\n"; - tbl.set_indent(4); - *ss << tbl; - } -} - -void PGMap::dump_fs_stats( - stringstream *ss, Formatter *f, bool verbose) const -{ - if (f) { - f->open_object_section("stats"); - f->dump_int("total_bytes", osd_sum.kb * 1024ull); - f->dump_int("total_used_bytes", osd_sum.kb_used * 1024ull); - f->dump_int("total_avail_bytes", osd_sum.kb_avail * 1024ull); - if (verbose) { - f->dump_int("total_objects", pg_sum.stats.sum.num_objects); - } - f->close_section(); - } else { - assert(ss != nullptr); - TextTable tbl; - tbl.define_column("SIZE", TextTable::LEFT, TextTable::RIGHT); - tbl.define_column("AVAIL", TextTable::LEFT, TextTable::RIGHT); - tbl.define_column("RAW USED", TextTable::LEFT, TextTable::RIGHT); - tbl.define_column("%RAW USED", TextTable::LEFT, TextTable::RIGHT); - if (verbose) { - tbl.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT); - } - tbl << stringify(si_t(osd_sum.kb*1024)) - << stringify(si_t(osd_sum.kb_avail*1024)) - << stringify(si_t(osd_sum.kb_used*1024)); - float used = 0.0; - if (osd_sum.kb > 0) { - used = ((float)osd_sum.kb_used / osd_sum.kb); - } - tbl << percentify(used*100); - if (verbose) { - tbl << stringify(si_t(pg_sum.stats.sum.num_objects)); - } - tbl << TextTable::endrow; - - *ss << "GLOBAL:\n"; - tbl.set_indent(4); - *ss << tbl; - } -} - -void PGMap::dump_object_stat_sum(TextTable &tbl, Formatter *f, - const object_stat_sum_t &sum, uint64_t avail, - float raw_used_rate, bool verbose, - const pg_pool_t *pool) -{ - float curr_object_copies_rate = 0.0; - if (sum.num_object_copies > 0) - curr_object_copies_rate = (float)(sum.num_object_copies - sum.num_objects_degraded) / sum.num_object_copies; - - if (f) { - f->dump_int("kb_used", SHIFT_ROUND_UP(sum.num_bytes, 10)); - f->dump_int("bytes_used", sum.num_bytes); - f->dump_unsigned("max_avail", avail); - f->dump_int("objects", sum.num_objects); - if (verbose) { - f->dump_int("quota_objects", pool->quota_max_objects); - f->dump_int("quota_bytes", pool->quota_max_bytes); - f->dump_int("dirty", sum.num_objects_dirty); - f->dump_int("rd", sum.num_rd); - f->dump_int("rd_bytes", sum.num_rd_kb * 1024ull); - f->dump_int("wr", sum.num_wr); - f->dump_int("wr_bytes", sum.num_wr_kb * 1024ull); - f->dump_int("raw_bytes_used", sum.num_bytes * raw_used_rate * curr_object_copies_rate); - } - } else { - tbl << stringify(si_t(sum.num_bytes)); - float used = 0.0; - if (avail) { - used = sum.num_bytes * curr_object_copies_rate; - used /= used + avail; - } else if (sum.num_bytes) { - used = 1.0; - } - tbl << percentify(used*100); - tbl << si_t(avail); - tbl << sum.num_objects; - if (verbose) { - tbl << stringify(si_t(sum.num_objects_dirty)) - << stringify(si_t(sum.num_rd)) - << stringify(si_t(sum.num_wr)) - << stringify(si_t(sum.num_bytes * raw_used_rate * curr_object_copies_rate)); - } - } -} - - int process_pg_map_command( const string& orig_prefix, const map& orig_cmdmap, diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h index 556cc8929bc..fee4313bcc4 100644 --- a/src/mon/PGMap.h +++ b/src/mon/PGMap.h @@ -32,14 +32,159 @@ namespace ceph { class Formatter; } -class PGMap { +class PGMapDigest { +public: + virtual ~PGMapDigest() {} + + ceph::unordered_map osd_stat; + + // aggregate state, populated by PGMap child + int64_t num_pg = 0, num_osd = 0; + int64_t num_pg_active = 0; + ceph::unordered_map pg_pool_sum; + pool_stat_t pg_sum; + osd_stat_t osd_sum; + ceph::unordered_map num_pg_by_state; + struct pg_count { + int acting = 0; + int up = 0; + int primary = 0; + void encode(bufferlist& bl) const { + ::encode(acting, bl); + ::encode(up, bl); + ::encode(primary, bl); + } + void decode(bufferlist::iterator& p) { + ::decode(acting, p); + ::decode(up, p); + ::decode(primary, p); + } + }; + ceph::unordered_map num_pg_by_osd; + + void print_summary(Formatter *f, ostream *out) const; + void print_oneline_summary(Formatter *f, ostream *out) const; + + // recent deltas, and summation + /** + * keep track of last deltas for each pool, calculated using + * @p pg_pool_sum as baseline. + */ + ceph::unordered_map > > per_pool_sum_deltas; + /** + * keep track of per-pool timestamp deltas, according to last update on + * each pool. + */ + ceph::unordered_map per_pool_sum_deltas_stamps; + /** + * keep track of sum deltas, per-pool, taking into account any previous + * deltas existing in @p per_pool_sum_deltas. The utime_t as second member + * of the pair is the timestamp refering to the last update (i.e., the first + * member of the pair) for a given pool. + */ + ceph::unordered_map > per_pool_sum_delta; + + list< pair > pg_sum_deltas; + pool_stat_t pg_sum_delta; + utime_t stamp_delta; + + + void recovery_summary(Formatter *f, list *psl, + const pool_stat_t& delta_sum) const; + void overall_recovery_summary(Formatter *f, list *psl) const; + void pool_recovery_summary(Formatter *f, list *psl, + uint64_t poolid) const; + void recovery_rate_summary(Formatter *f, ostream *out, + const pool_stat_t& delta_sum, + utime_t delta_stamp) const; + void overall_recovery_rate_summary(Formatter *f, ostream *out) const; + void pool_recovery_rate_summary(Formatter *f, ostream *out, + uint64_t poolid) const; + /** + * Obtain a formatted/plain output for client I/O, source from stats for a + * given @p delta_sum pool over a given @p delta_stamp period of time. + */ + void client_io_rate_summary(Formatter *f, ostream *out, + const pool_stat_t& delta_sum, + utime_t delta_stamp) const; + /** + * Obtain a formatted/plain output for the overall client I/O, which is + * calculated resorting to @p pg_sum_delta and @p stamp_delta. + */ + void overall_client_io_rate_summary(Formatter *f, ostream *out) const; + /** + * Obtain a formatted/plain output for client I/O over a given pool + * with id @p pool_id. We will then obtain pool-specific data + * from @p per_pool_sum_delta. + */ + void pool_client_io_rate_summary(Formatter *f, ostream *out, + uint64_t poolid) const; + /** + * Obtain a formatted/plain output for cache tier IO, source from stats for a + * given @p delta_sum pool over a given @p delta_stamp period of time. + */ + void cache_io_rate_summary(Formatter *f, ostream *out, + const pool_stat_t& delta_sum, + utime_t delta_stamp) const; + /** + * Obtain a formatted/plain output for the overall cache tier IO, which is + * calculated resorting to @p pg_sum_delta and @p stamp_delta. + */ + void overall_cache_io_rate_summary(Formatter *f, ostream *out) const; + /** + * Obtain a formatted/plain output for cache tier IO over a given pool + * with id @p pool_id. We will then obtain pool-specific data + * from @p per_pool_sum_delta. + */ + void pool_cache_io_rate_summary(Formatter *f, ostream *out, + uint64_t poolid) const; + + void dump_pool_stats_full(const OSDMap &osd_map, stringstream *ss, Formatter *f, + bool verbose) const; + void dump_fs_stats(stringstream *ss, Formatter *f, bool verbose) const; + static void dump_object_stat_sum(TextTable &tbl, Formatter *f, + const object_stat_sum_t &sum, + uint64_t avail, + float raw_used_rate, + bool verbose, const pg_pool_t *pool); + + size_t get_num_pg_by_osd(int osd) const { + auto p = num_pg_by_osd.find(osd); + if (p == num_pg_by_osd.end()) + return 0; + else + return p->second.acting; + } + int get_num_primary_pg_by_osd(int osd) const { + auto p = num_pg_by_osd.find(osd); + if (p == num_pg_by_osd.end()) + return 0; + else + return p->second.primary; + } + + int64_t get_rule_avail(const OSDMap& osdmap, int ruleno) const; + + // kill me post-luminous: + virtual float get_fallback_full_ratio() const { + return .95; + } + + void encode(bufferlist& bl, uint64_t features) const; + void decode(bufferlist::iterator& p); + void dump(Formatter *f) const; + static void generate_test_instances(list& ls); +}; +WRITE_CLASS_ENCODER(PGMapDigest::pg_count); +WRITE_CLASS_ENCODER_FEATURES(PGMapDigest); + +class PGMap : public PGMapDigest { public: // the map version_t version; epoch_t last_osdmap_epoch; // last osdmap epoch i applied to the pgmap epoch_t last_pg_scan; // osdmap epoch ceph::unordered_map pg_stat; - ceph::unordered_map osd_stat; set full_osds; set nearfull_osds; float full_ratio; @@ -117,42 +262,12 @@ public: // aggregate stats (soft state), generated by calc_stats() - ceph::unordered_map num_pg_by_state; - int64_t num_pg = 0, num_osd = 0; - int64_t num_pg_active = 0; - ceph::unordered_map pg_pool_sum; - pool_stat_t pg_sum; - osd_stat_t osd_sum; mutable epoch_t min_last_epoch_clean = 0; - ceph::unordered_map blocked_by_sum; ceph::unordered_map > pg_by_osd; - ceph::unordered_map num_primary_pg_by_osd; + ceph::unordered_map blocked_by_sum; utime_t stamp; - // recent deltas, and summation - /** - * keep track of last deltas for each pool, calculated using - * @p pg_pool_sum as baseline. - */ - ceph::unordered_map > > per_pool_sum_deltas; - /** - * keep track of per-pool timestamp deltas, according to last update on - * each pool. - */ - ceph::unordered_map per_pool_sum_deltas_stamps; - /** - * keep track of sum deltas, per-pool, taking into account any previous - * deltas existing in @p per_pool_sum_deltas. The utime_t as second member - * of the pair is the timestamp refering to the last update (i.e., the first - * member of the pair) for a given pool. - */ - ceph::unordered_map > per_pool_sum_delta; - - list< pair > pg_sum_deltas; - pool_stat_t pg_sum_delta; - utime_t stamp_delta; - void update_global_delta(CephContext *cct, const utime_t ts, const pool_stat_t& pg_sum_old); void update_pool_deltas(CephContext *cct, @@ -184,8 +299,6 @@ public: epoch_t calc_min_last_epoch_clean() const; - int64_t get_rule_avail(const OSDMap& osdmap, int ruleno) const; - public: set creating_pgs; @@ -237,14 +350,6 @@ public: stamp = s; } - size_t get_num_pg_by_osd(int osd) const { - ceph::unordered_map >::const_iterator p = pg_by_osd.find(osd); - if (p == pg_by_osd.end()) - return 0; - else - return p->second.size(); - } - pool_stat_t get_pg_pool_sum_stat(int64_t pool) const { ceph::unordered_map::const_iterator p = pg_pool_sum.find(pool); @@ -253,14 +358,6 @@ public: return pool_stat_t(); } - int get_num_primary_pg_by_osd(int osd) const { - assert(osd >= 0); - int num = 0; - auto it = num_primary_pg_by_osd.find(osd); - if (it != num_primary_pg_by_osd.end()) - num = it->second; - return num; - } void update_pg(pg_t pgid, bufferlist& bl); void remove_pg(pg_t pgid); @@ -282,15 +379,12 @@ public: void encode(bufferlist &bl, uint64_t features=-1) const; void decode(bufferlist::iterator &bl); + /// encode subset of our data to a PGMapDigest + void encode_digest(bufferlist& bl, uint64_t features) const; + + void dirty_all(Incremental& inc); + void dump(Formatter *f) const; - void dump_pool_stats(const OSDMap &osd_map, stringstream *ss, Formatter *f, - bool verbose) const; - void dump_fs_stats(stringstream *ss, Formatter *f, bool verbose) const; - static void dump_object_stat_sum(TextTable &tbl, Formatter *f, - const object_stat_sum_t &sum, - uint64_t avail, - float raw_used_rate, - bool verbose, const pg_pool_t *pool); void dump_basic(Formatter *f) const; void dump_pg_stats(Formatter *f, bool brief) const; void dump_pool_stats(Formatter *f) const; @@ -327,58 +421,6 @@ public: void get_filtered_pg_stats(uint32_t state, int64_t poolid, int64_t osdid, bool primary, set& pgs) const; - void recovery_summary(Formatter *f, list *psl, - const pool_stat_t& delta_sum) const; - void overall_recovery_summary(Formatter *f, list *psl) const; - void pool_recovery_summary(Formatter *f, list *psl, - uint64_t poolid) const; - void recovery_rate_summary(Formatter *f, ostream *out, - const pool_stat_t& delta_sum, - utime_t delta_stamp) const; - void overall_recovery_rate_summary(Formatter *f, ostream *out) const; - void pool_recovery_rate_summary(Formatter *f, ostream *out, - uint64_t poolid) const; - /** - * Obtain a formatted/plain output for client I/O, source from stats for a - * given @p delta_sum pool over a given @p delta_stamp period of time. - */ - void client_io_rate_summary(Formatter *f, ostream *out, - const pool_stat_t& delta_sum, - utime_t delta_stamp) const; - /** - * Obtain a formatted/plain output for the overall client I/O, which is - * calculated resorting to @p pg_sum_delta and @p stamp_delta. - */ - void overall_client_io_rate_summary(Formatter *f, ostream *out) const; - /** - * Obtain a formatted/plain output for client I/O over a given pool - * with id @p pool_id. We will then obtain pool-specific data - * from @p per_pool_sum_delta. - */ - void pool_client_io_rate_summary(Formatter *f, ostream *out, - uint64_t poolid) const; - /** - * Obtain a formatted/plain output for cache tier IO, source from stats for a - * given @p delta_sum pool over a given @p delta_stamp period of time. - */ - void cache_io_rate_summary(Formatter *f, ostream *out, - const pool_stat_t& delta_sum, - utime_t delta_stamp) const; - /** - * Obtain a formatted/plain output for the overall cache tier IO, which is - * calculated resorting to @p pg_sum_delta and @p stamp_delta. - */ - void overall_cache_io_rate_summary(Formatter *f, ostream *out) const; - /** - * Obtain a formatted/plain output for cache tier IO over a given pool - * with id @p pool_id. We will then obtain pool-specific data - * from @p per_pool_sum_delta. - */ - void pool_cache_io_rate_summary(Formatter *f, ostream *out, - uint64_t poolid) const; - - void print_summary(Formatter *f, ostream *out) const; - void print_oneline_summary(Formatter *f, ostream *out) const; epoch_t get_min_last_epoch_clean() const { if (!min_last_epoch_clean) @@ -386,6 +428,13 @@ public: return min_last_epoch_clean; } + float get_fallback_full_ratio() const override { + if (full_ratio > 0) { + return full_ratio; + } + return .95; + } + static void generate_test_instances(list& o); }; WRITE_CLASS_ENCODER_FEATURES(PGMap::Incremental) diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index f9ae1d9fbe1..15e22daebe8 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -1741,7 +1741,7 @@ public: } void dump_pool_stats(const OSDMap& osdm, stringstream *ss, Formatter *f, bool verbose) const { - parent.dump_pool_stats(osdm, ss, f, verbose); + parent.dump_pool_stats_full(osdm, ss, f, verbose); } int process_pg_command(const string& prefix, diff --git a/src/test/encoding/types.h b/src/test/encoding/types.h index 77df9f0d7e6..1053e66fbc7 100644 --- a/src/test/encoding/types.h +++ b/src/test/encoding/types.h @@ -145,6 +145,7 @@ TYPE_FEATUREFUL(AuthMonitor::Incremental) #include "mon/PGMap.h" TYPE_FEATUREFUL(PGMap::Incremental) TYPE_FEATUREFUL_NONDETERMINISTIC(PGMap) +TYPE_FEATUREFUL_NONDETERMINISTIC(PGMapDigest) #include "mon/MonitorDBStore.h" TYPE(MonitorDBStore::Transaction)