encode(service_map, pending_service_map_bl, CEPH_FEATURES_ALL);
}
+void MgrStatMonitor::calc_pool_availability()
+{
+ dout(20) << __func__ << dendl;
+ auto pool_avail_end = pool_availability.end();
+ for (const auto& i : digest.pool_pg_unavailable_map) {
+ const auto& poolid = i.first;
+ if (pool_availability.find(poolid) == pool_avail_end){
+ // New Pool so we add.
+ pool_availability.insert({poolid, PoolAvailability()});
+ dout(20) << __func__ << "Adding pool: " << poolid << dendl;
+ }
+ }
+ utime_t now(ceph_clock_now());
+ auto pool_unavail_end = digest.pool_pg_unavailable_map.end();
+ for (const auto& i : pool_availability) {
+ const auto& poolid = i.first;
+ if (digest.pool_pg_unavailable_map.find(poolid) ==
+ pool_unavail_end) {
+ // delete none exist pool
+ pool_availability.erase(poolid);
+ dout(20) << __func__ << "Deleting pool: " << poolid << dendl;
+ continue;
+ }
+ if (mon.osdmon()->osdmap.have_pg_pool(poolid)){
+ // Currently, couldn't find an elegant way to get pool name
+ pool_availability[poolid].pool_name = mon.osdmon()->osdmap.get_pool_name(poolid);
+ } else {
+ pool_availability.erase(poolid);
+ dout(20) << __func__ << "pool: "
+ << poolid << " no longer exists in osdmap! Deleting pool: "
+ << poolid << dendl;
+ continue;
+ }
+ if (pool_availability[poolid].is_avail) {
+ if (!digest.pool_pg_unavailable_map[poolid].empty()) {
+ // avail to unavail
+ dout(20) << __func__
+ << ": Pool " << poolid << " status: Available to Unavailable" << dendl;
+ pool_availability[poolid].is_avail = false;
+ pool_availability[poolid].num_failures += 1;
+ pool_availability[poolid].last_downtime = now;
+ pool_availability[poolid].uptime +=
+ now - pool_availability[poolid].last_uptime;
+ } else {
+ // avail to avail
+ dout(20) << __func__
+ << ": Pool " << poolid << " status: Available to Available" << dendl;
+ pool_availability[poolid].uptime +=
+ now - pool_availability[poolid].last_uptime;
+ pool_availability[poolid].last_uptime = now;
+ }
+ } else {
+ if (!digest.pool_pg_unavailable_map[poolid].empty()) {
+ // unavail to unavail
+ dout(20) << __func__
+ << ": Pool " << poolid << " status: Unavailable to Unavailable" << dendl;
+ pool_availability[poolid].downtime +=
+ now - pool_availability[poolid].last_downtime;
+ pool_availability[poolid].last_downtime = now;
+ } else {
+ // unavail to avail
+ dout(20) << __func__
+ << ": Pool " << poolid << " status: Unavailable to Available" << dendl;
+ pool_availability[poolid].is_avail = true;
+ pool_availability[poolid].last_uptime = now;
+ pool_availability[poolid].uptime +=
+ now - pool_availability[poolid].last_downtime;
+ }
+ }
+ }
+ pending_pool_availability.swap(pool_availability);
+}
+
void MgrStatMonitor::update_from_paxos(bool *need_bootstrap)
{
version = get_last_committed();
if (!p.end()) {
decode(progress_events, p);
}
+ if (!p.end()) {
+ decode(pool_availability, p);
+ }
dout(10) << __func__ << " v" << version
<< " service_map e" << service_map.epoch
<< " " << progress_events.size() << " progress events"
+ << " " << pool_availability.size() << " pools availability tracked"
<< dendl;
}
catch (ceph::buffer::error& e) {
check_subs();
update_logger();
mon.osdmon()->notify_new_pg_digest();
+ calc_pool_availability();
}
void MgrStatMonitor::update_logger()
ceph_assert(pending_service_map_bl.length());
bl.append(pending_service_map_bl);
encode(pending_progress_events, bl);
+ encode(pending_pool_availability, bl);
put_version(t, version, bl);
put_last_committed(t, version);
jf.close_section();
jf.flush(*_dout);
*_dout << dendl;
+ dout(20) << "pool_availability:\n";
+ JSONFormatter jf(true);
+ jf.open_object_section("pool_availability");
+ for (auto& i : pending_pool_availability) {
+ jf.dump_object(std::to_string(i.first), i.second);
+ }
+ jf.close_section();
+ jf.flush(*_dout);
+ *_dout << dendl;
return true;
}
PGMapDigest digest;
ServiceMap service_map;
std::map<std::string,ProgressEvent> progress_events;
+ std::map<uint64_t, PoolAvailability> pool_availability;
// pending commit
PGMapDigest pending_digest;
health_check_map_t pending_health_checks;
std::map<std::string,ProgressEvent> pending_progress_events;
ceph::buffer::list pending_service_map_bl;
+ std::map<uint64_t, PoolAvailability> pending_pool_availability;
public:
MgrStatMonitor(Monitor &mn, Paxos &p, const std::string& service_name);
bool preprocess_getpoolstats(MonOpRequestRef op);
bool preprocess_statfs(MonOpRequestRef op);
+ void calc_pool_availability();
+
void check_sub(Subscription *sub);
void check_subs();
void send_digests();
return digest;
}
+ const std::map<uint64_t, PoolAvailability>& get_pool_availability() {
+ return pool_availability;
+ }
+
ceph_statfs get_statfs(OSDMap& osdmap,
std::optional<int64_t> data_pool) const {
return digest.get_statfs(osdmap, data_pool);
using ceph::Formatter;
using ceph::JSONFormatter;
using ceph::make_message;
+using ceph::make_timespan;
+using ceph::timespan_str;
using namespace std::literals;
#define dout_subsys ceph_subsys_mon
wait_for_commit(op, new Monitor::C_Command(mon, op, 0, rs,
get_last_committed() + 1));
return true;
+ } else if (prefix == "osd pool availability-status") {
+ TextTable tbl;
+ tbl.define_column("POOL", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("UPTIME", TextTable::LEFT, TextTable::RIGHT);
+ tbl.define_column("DOWNTIME", TextTable::LEFT, TextTable::RIGHT);
+ tbl.define_column("NUMFAILURES", TextTable::LEFT, TextTable::RIGHT);
+ tbl.define_column("MTBF", TextTable::LEFT, TextTable::RIGHT);
+ tbl.define_column("MTTR", TextTable::LEFT, TextTable::RIGHT);
+ tbl.define_column("SCORE", TextTable::LEFT, TextTable::RIGHT);
+ tbl.define_column("AVAILABLE", TextTable::LEFT, TextTable::RIGHT);
+ std::map<uint64_t, PoolAvailability> pool_availability = mon.mgrstatmon()->get_pool_availability();
+ for (const auto& i : pool_availability) {
+ const auto& p = i.second;
+ double mtbf = p.num_failures > 0 ? (p.uptime / p.num_failures) : 0;
+ double mttr = p.num_failures > 0 ? (p.downtime / p.num_failures) : 0;
+ double score = mtbf > 0 ? mtbf / (mtbf + mttr): 1.0;
+ tbl << p.pool_name;
+ tbl << timespan_str(make_timespan(p.uptime));
+ tbl << timespan_str(make_timespan(p.downtime));
+ tbl << p.num_failures;
+ tbl << timespan_str(make_timespan(mtbf));
+ tbl << timespan_str(make_timespan(mttr));
+ tbl << score;
+ tbl << p.is_avail;
+ tbl << TextTable::endrow;
+ }
+ rdata.append(stringify(tbl));
} else if (prefix == "osd force-create-pg") {
pg_t pgid;
string pgidstr;
void PGMapDigest::encode(bufferlist& bl, uint64_t features) const
{
// NOTE: see PGMap::encode_digest
- uint8_t v = 4;
+ uint8_t v = 5;
assert(HAVE_FEATURE(features, SERVER_NAUTILUS));
ENCODE_START(v, 1, bl);
encode(num_pg, bl);
encode(avail_space_by_rule, bl);
encode(purged_snaps, bl);
encode(osd_sum_by_class, bl, features);
+ encode(pool_pg_unavailable_map, bl);
ENCODE_FINISH(bl);
}
void PGMapDigest::decode(bufferlist::const_iterator& p)
{
- DECODE_START(4, p);
+ DECODE_START(5, p);
assert(struct_v >= 4);
decode(num_pg, p);
decode(num_pg_active, p);
decode(avail_space_by_rule, p);
decode(purged_snaps, p);
decode(osd_sum_by_class, p);
+ if (struct_v >= 5) {
+ decode(pool_pg_unavailable_map, p);
+ }
DECODE_FINISH(p);
}
f->close_section();
}
f->close_section();
+ f->open_array_section("pool_pg_unavailable_map");
+ for (auto& p : pool_pg_unavailable_map) {
+ f->open_object_section("pool_pg_unavailable_map");
+ f->dump_string("poolid", std::to_string(p.first));
+ f->open_array_section("pgs");
+ for (const auto& pg : p.second) {
+ f->dump_stream("pg") << pg;
+ }
+ f->close_section();
+ f->close_section();
+ }
+ f->close_section();
f->open_array_section("num_pg_by_osd");
for (auto& p : num_pg_by_osd) {
f->open_object_section("count");
last_pg_scan = inc.pg_scan;
}
+/*
+ Returns a map of all pools in a cluster. Each value lists any PGs that
+ are in any of the following states:
+ - non-active
+ - stale
+
+ Eg: {1=[1.0],2=[],3=[]}
+ Here the cluster has 3 pools with id 1,2,3 and pool 1 has an inactive PG 1.0
+*/
+void PGMap::get_unavailable_pg_in_pool_map(const OSDMap& osdmap)
+{
+ dout(20) << __func__ << dendl;
+ pool_pg_unavailable_map.clear();
+ utime_t now(ceph_clock_now());
+ utime_t cutoff = now - utime_t(g_conf().get_val<int64_t>("mon_pg_stuck_threshold"), 0);
+ for (auto i = pg_stat.begin();
+ i != pg_stat.end();
+ ++i) {
+ const auto poolid = i->first.pool();
+ pool_pg_unavailable_map[poolid];
+ utime_t val = cutoff;
+
+ if (!(i->second.state & PG_STATE_ACTIVE)) { // This case covers unknown state since unknow state bit == 0;
+ if (i->second.last_active < val)
+ val = i->second.last_active;
+ }
+
+ if (i->second.state & PG_STATE_STALE) {
+ if (i->second.last_unstale < val)
+ val = i->second.last_unstale;
+ }
+
+ if (val < cutoff) {
+ pool_pg_unavailable_map[poolid].push_back(i->first);
+ dout(20) << "pool: " << poolid << " pg: " << i->first
+ << " is stuck unavailable" << " state: " << i->second.state << dendl;
+ }
+ }
+}
+
void PGMap::calc_stats()
{
num_pg = 0;
get_rules_avail(osdmap, &avail_space_by_rule);
calc_osd_sum_by_class(osdmap);
calc_purged_snaps();
+ get_unavailable_pg_in_pool_map(osdmap);
PGMapDigest::encode(bl, features);
}
#include "common/Formatter.h"
#include "osd/osd_types.h"
#include "include/mempool.h"
+#include "mon/health_check.h"
+#include <sstream>
+#include "mon/mon_types.h"
#include <cstdint>
#include <iosfwd>
osd_stat_t osd_sum;
mempool::pgmap::map<std::string,osd_stat_t> osd_sum_by_class;
mempool::pgmap::unordered_map<uint64_t,int32_t> num_pg_by_state;
+ mempool::pgmap::map<uint64_t,std::vector<pg_t>> pool_pg_unavailable_map;
struct pg_count {
int32_t acting = 0;
int32_t up_not_acting = 0;
void apply_incremental(CephContext *cct, const Incremental& inc);
void calc_stats();
+ void get_unavailable_pg_in_pool_map(const OSDMap& osdmap);
void stat_pg_add(const pg_t &pgid, const pg_stat_t &s,
bool sameosds=false);
bool stat_pg_sub(const pg_t &pgid, const pg_stat_t &s,
#include "common/bit_str.h"
#include "common/ceph_releases.h"
#include "msg/msg_types.h" // for entity_addrvec_t
+#include "common/Clock.h"
// use as paxos_service index
enum {
};
WRITE_CLASS_ENCODER(ProgressEvent)
+struct PoolAvailability {
+ std::string pool_name = "";
+ utime_t started_at = ceph_clock_now();
+ uint64_t uptime = 0;
+ utime_t last_uptime = ceph_clock_now();
+ uint64_t downtime = 0;
+ utime_t last_downtime = ceph_clock_now();
+ uint64_t num_failures = 0;
+ bool is_avail = true;
+
+ PoolAvailability() {}
+
+ void dump(ceph::Formatter *f) const {
+ ceph_assert(f != NULL);
+ f->dump_stream("pool_name") << pool_name;
+ f->dump_stream("started_at") << started_at;
+ f->dump_int("uptime", uptime);
+ f->dump_stream("last_uptime") << last_uptime;
+ f->dump_int("downtime", downtime);
+ f->dump_stream("last_downtime") << last_downtime;
+ f->dump_int("num_failures", num_failures);
+ f->dump_bool("is_avail", is_avail);
+ }
+
+ void encode(ceph::buffer::list &bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(pool_name, bl);
+ encode(started_at, bl);
+ encode(uptime, bl);
+ encode(last_uptime, bl);
+ encode(downtime, bl);
+ encode(last_downtime, bl);
+ encode(num_failures, bl);
+ encode(is_avail, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(ceph::buffer::list::const_iterator &p) {
+ DECODE_START(1, p);
+ decode(pool_name, p);
+ decode(started_at, p);
+ decode(uptime, p);
+ decode(last_uptime, p);
+ decode(downtime, p);
+ decode(last_downtime, p);
+ decode(num_failures, p);
+ decode(is_avail, p);
+ DECODE_FINISH(p);
+ }
+
+ static void generate_test_instances(std::list<PoolAvailability*>& o) {
+ o.push_back(new PoolAvailability);
+ o.back()->started_at = utime_t(123, 456);
+ o.back()->last_uptime = utime_t(123, 456);
+ o.back()->last_downtime = utime_t(123, 456);
+ o.push_back(new PoolAvailability);
+ o.back()->pool_name = "foo";
+ o.back()->started_at = utime_t(123, 456);
+ o.back()->uptime = 100;
+ o.back()->last_uptime = utime_t(123, 456);
+ o.back()->downtime = 15;
+ o.back()->last_downtime = utime_t(123, 456);
+ o.back()->num_failures = 2;
+ o.back()->is_avail = true;
+ }
+};
+WRITE_CLASS_ENCODER(PoolAvailability)
+
#endif
TYPE_FEATUREFUL(DataStats)
TYPE_FEATUREFUL(ProgressEvent)
TYPE(FeatureMap)
+TYPE(PoolAvailability)
#include "mon/CreatingPGs.h"
TYPE_FEATUREFUL(creating_pgs_t)