From 84a678ed50fecf434b413dbf0333e68f9ba384c4 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 10 Jan 2008 13:58:52 -0800 Subject: [PATCH] statfs returns real data; osds report utilization properly; works in fuse client --- src/include/ceph_fs.h | 18 ++++++----- src/mon/PGMap.h | 71 +++++++++++++++++++++++++++++++++-------- src/mon/PGMonitor.cc | 36 ++++++++++++++------- src/osd/OSD.cc | 9 ++++-- src/osd/OSD.h | 1 + src/osd/PG.cc | 21 ++++++------ src/osd/PG.h | 4 +-- src/osd/ReplicatedPG.cc | 3 +- src/osd/osd_types.h | 4 +-- 9 files changed, 117 insertions(+), 50 deletions(-) diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 99109b666cd87..7c4c645e61c6d 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -213,8 +213,6 @@ struct ceph_entity_addr { (a)->nonce == (b)->nonce && \ memcmp((a), (b), sizeof(*(a)) == 0)) - - struct ceph_entity_inst { struct ceph_entity_name name; struct ceph_entity_addr addr; @@ -269,11 +267,15 @@ struct ceph_msg_header { #define CEPH_MSG_OSD_OPREPLY 43 -/* mds states */ -#define CEPH_MDS_STATE_DNE 0 /* down, never existed. */ +/* + * mds states + * > 0 -> in + * <= 0 -> out + */ +#define CEPH_MDS_STATE_DNE 0 /* down, does not exist. */ #define CEPH_MDS_STATE_STOPPED -1 /* down, once existed, but no subtrees. empty log. */ -#define CEPH_MDS_STATE_DESTROYING -2 /* down, once existed, but no subtrees. empty log. */ -#define CEPH_MDS_STATE_FAILED 3 /* down, active subtrees needs to be recovered. */ +#define CEPH_MDS_STATE_DESTROYING -2 /* down, existing, semi-destroyed. */ +#define CEPH_MDS_STATE_FAILED 3 /* down, needs to be recovered. */ #define CEPH_MDS_STATE_BOOT -4 /* up, boot announcement. destiny unknown. */ #define CEPH_MDS_STATE_STANDBY -5 /* up, idle. waiting for assignment by monitor. */ @@ -283,9 +285,9 @@ struct ceph_msg_header { #define CEPH_MDS_STATE_REPLAY 8 /* up, starting prior failed instance. scanning journal. */ #define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed operations (import, rename, etc.) */ #define CEPH_MDS_STATE_RECONNECT 10 /* up, reconnect to clients */ -#define CEPH_MDS_STATE_REJOIN 11 /* up, replayed journal, rejoining distributed cache */ +#define CEPH_MDS_STATE_REJOIN 11 /* up, rejoining distributed cache */ #define CEPH_MDS_STATE_ACTIVE 12 /* up, active */ -#define CEPH_MDS_STATE_STOPPING 13 /* up, exporting metadata (-> standby or out) */ +#define CEPH_MDS_STATE_STOPPING 13 /* up, exporting metadata */ /* client_session message op values */ diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h index b915c28cbd755..54ebe5fc8539f 100644 --- a/src/mon/PGMap.h +++ b/src/mon/PGMap.h @@ -51,39 +51,78 @@ public: p != inc.pg_stat_updates.end(); ++p) { if (pg_stat.count(p->first)) - stat_sub(pg_stat[p->first]); + stat_pg_sub(pg_stat[p->first]); pg_stat[p->first] = p->second; - stat_add(p->second); + stat_pg_add(p->second); + } + for (map::iterator p = inc.osd_stat_updates.begin(); + p != inc.osd_stat_updates.end(); + ++p) { + if (osd_stat.count(p->first)) + stat_osd_sub(osd_stat[p->first]); + osd_stat[p->first] = p->second; + stat_osd_add(p->second); } } // aggregate stats (soft state) hash_map num_pg_by_state; int64_t num_pg; - int64_t total_size; - int64_t total_num_blocks; + int64_t total_pg_num_bytes; + int64_t total_pg_num_blocks; + int64_t total_pg_num_objects; + int64_t num_osd; + int64_t total_osd_num_blocks; + int64_t total_osd_num_blocks_avail; + int64_t total_osd_num_objects; void stat_zero() { num_pg = 0; num_pg_by_state.clear(); - total_size = 0; - total_num_blocks = 0; + total_pg_num_bytes = 0; + total_pg_num_blocks = 0; + total_pg_num_objects = 0; + num_osd = 0; + total_osd_num_blocks = 0; + total_osd_num_blocks_avail = 0; + total_osd_num_objects = 0; } - void stat_add(pg_stat_t &s) { + void stat_pg_add(pg_stat_t &s) { num_pg++; num_pg_by_state[s.state]++; - total_size += s.size; - total_num_blocks += s.num_blocks; + total_pg_num_bytes += s.num_bytes; + total_pg_num_blocks += s.num_blocks; + total_pg_num_objects += s.num_objects; + } + void stat_osd_add(osd_stat_t &s) { + num_osd++; + total_osd_num_blocks += s.num_blocks; + total_osd_num_blocks_avail += s.num_blocks_avail; + total_osd_num_objects += s.num_objects; } - void stat_sub(pg_stat_t &s) { + void stat_pg_sub(pg_stat_t &s) { num_pg--; num_pg_by_state[s.state]--; - total_size -= s.size; - total_num_blocks -= s.num_blocks; + total_pg_num_bytes -= s.num_bytes; + total_pg_num_blocks -= s.num_blocks; + total_pg_num_objects -= s.num_objects; + } + void stat_osd_sub(osd_stat_t &s) { + num_osd--; + total_osd_num_blocks -= s.num_blocks; + total_osd_num_blocks_avail -= s.num_blocks_avail; + total_osd_num_objects -= s.num_objects; } PGMap() : version(0), - num_pg(0), total_size(0), total_num_blocks(0) {} + num_pg(0), + total_pg_num_bytes(0), + total_pg_num_blocks(0), + total_pg_num_objects(0), + num_osd(0), + total_osd_num_blocks(0), + total_osd_num_blocks_avail(0), + total_osd_num_objects(0) {} void _encode(bufferlist &bl) { ::_encode(version, bl); @@ -96,7 +135,11 @@ public: for (hash_map::iterator p = pg_stat.begin(); p != pg_stat.end(); ++p) - stat_add(p->second); + stat_pg_add(p->second); + for (hash_map::iterator p = osd_stat.begin(); + p != osd_stat.end(); + ++p) + stat_osd_add(p->second); } }; diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index 11c50fbc10ee9..77a39e3645b2c 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -189,14 +189,15 @@ void PGMonitor::handle_statfs(MStatfs *statfs) // fill out stfs MStatfsReply *reply = new MStatfsReply(statfs->tid); memset(&reply->stfs, 0, sizeof(reply->stfs)); - reply->stfs.f_bsize = 1024; - reply->stfs.f_frsize = 1024; - reply->stfs.f_blocks = 1024 * 1024; //pg_map.total_num_blocks; - reply->stfs.f_bfree = 1024 * 1024; - reply->stfs.f_bavail = 1024 * 1024; - reply->stfs.f_files = 1024 * 1024; - reply->stfs.f_ffree = 1024 * 1024; - reply->stfs.f_favail = 1024 * 1024; + // these are in KB: + reply->stfs.f_bsize = 4096; // fixme + reply->stfs.f_frsize = 4096; // what is this? + reply->stfs.f_blocks = pg_map.total_osd_num_blocks; // kb + reply->stfs.f_bfree = pg_map.total_osd_num_blocks_avail; + reply->stfs.f_bavail = pg_map.total_osd_num_blocks_avail; + reply->stfs.f_files = pg_map.total_osd_num_objects; + reply->stfs.f_ffree = -1; + reply->stfs.f_favail = -1; reply->stfs.f_namemax = 1024; #ifdef __CYGWIN__ reply->stfs.f_flag = 0; @@ -212,7 +213,20 @@ void PGMonitor::handle_statfs(MStatfs *statfs) bool PGMonitor::handle_pg_stats(MPGStats *stats) { dout(10) << "handle_pg_stats " << *stats << " from " << stats->get_source() << dendl; - + int from = stats->get_source().num(); + if (!stats->get_source().is_osd() || + !mon->osdmon->osdmap.is_up(from) || + stats->get_source_inst() != mon->osdmon->osdmap.get_inst(from)) { + dout(1) << " ignoring stats from non-active osd" << dendl; + } + + // osd stat + if (pg_map.osd_stat.count(from)) + pg_map.stat_osd_sub(pg_map.osd_stat[from]); + pg_map.osd_stat[from] = stats->osd_stat; + pg_map.stat_osd_add(stats->osd_stat); + + // pg stats for (map::iterator p = stats->pg_stat.begin(); p != stats->pg_stat.end(); p++) { @@ -236,9 +250,9 @@ bool PGMonitor::handle_pg_stats(MPGStats *stats) // we don't care about consistency; apply to live map. if (pg_map.pg_stat.count(pgid)) - pg_map.stat_sub(pg_map.pg_stat[pgid]); + pg_map.stat_pg_sub(pg_map.pg_stat[pgid]); pg_map.pg_stat[pgid] = p->second; - pg_map.stat_add(pg_map.pg_stat[pgid]); + pg_map.stat_pg_add(pg_map.pg_stat[pgid]); } delete stats; diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index c639070c0cc37..be5244bbcaa99 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -823,14 +823,17 @@ void OSD::heartbeat() void OSD::send_pg_stats() { //dout(-10) << "send_pg_stats" << dendl; - + bool updated; + // grab queue set q; pg_stat_queue_lock.Lock(); q.swap(pg_stat_queue); + updated = osd_stat_updated; + osd_stat_updated = false; pg_stat_queue_lock.Unlock(); - - if (!q.empty()) { + + if (!q.empty() || osd_stat_updated) { dout(1) << "send_pg_stats - " << q.size() << " pgs updated" << dendl; MPGStats *m = new MPGStats; diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 871657642ff0a..8c8980b434a45 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -297,6 +297,7 @@ private: // -- pg stats -- Mutex pg_stat_queue_lock; set pg_stat_queue; + bool osd_stat_updated; class C_Stats : public Context { OSD *osd; diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 25a64a8ba101b..26201df53d352 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1018,19 +1018,22 @@ void PG::finish_recovery() void PG::update_stats() { dout(15) << "update_stats" << dendl; - assert(is_primary()); - // update our stat summary - pg_stats_lock.Lock(); - pg_stats.reported = info.last_update; - pg_stats.state = state; - pg_stats.size = stat_size; - pg_stats.num_blocks = stat_num_blocks; - pg_stats_lock.Unlock(); + if (is_primary()) { + // update our stat summary + pg_stats_lock.Lock(); + pg_stats.reported = info.last_update; + pg_stats.state = state; + pg_stats.num_bytes = stat_num_bytes; + pg_stats.num_blocks = stat_num_blocks; + pg_stats_lock.Unlock(); + } // put in osd stat_queue osd->pg_stat_queue_lock.Lock(); - osd->pg_stat_queue.insert(info.pgid); + if (is_primary()) + osd->pg_stat_queue.insert(info.pgid); + osd->osd_stat_updated = true; osd->pg_stat_queue_lock.Unlock(); } diff --git a/src/osd/PG.h b/src/osd/PG.h index b6dac4d65d2e4..eb07a6f5709f7 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -496,7 +496,7 @@ protected: // stats - off_t stat_size; + off_t stat_num_bytes; off_t stat_num_blocks; hash_map stat_object_temp_rd; @@ -573,7 +573,7 @@ public: state(0), last_epoch_started_any(0), have_master_log(true), - stat_size(0), stat_num_blocks(0) + stat_num_bytes(0), stat_num_blocks(0) { } virtual ~PG() { } diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 128e99bf21d75..b5a50f369627e 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -828,7 +828,8 @@ void ReplicatedPG::apply_repop(RepGather *repop) break; } - + update_stats(); + } void ReplicatedPG::put_rep_gather(RepGather *repop) diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 6809dd18b9424..44128fe713d29 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -228,11 +228,11 @@ struct pg_stat_t { eversion_t reported; int32_t state; - int64_t size; // in bytes + int64_t num_bytes; // in bytes int64_t num_blocks; // in 4k blocks int64_t num_objects; - pg_stat_t() : state(0), size(0), num_blocks(0), num_objects(0) {} + pg_stat_t() : state(0), num_bytes(0), num_blocks(0), num_objects(0) {} }; typedef struct ceph_osd_peer_stat osd_peer_stat_t; -- 2.39.5