From 2311789487ef5a5dcb01236c21bae7b233a74a85 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 17 Nov 2008 21:16:33 -0800 Subject: [PATCH] osd: maintain some per-pg stats (object counts, for now) We'll add disk usage soon. --- src/osd/OSD.cc | 4 ++-- src/osd/PG.cc | 17 +++++++++++++++-- src/osd/PG.h | 6 +----- src/osd/ReplicatedPG.cc | 38 ++++++++++++++++++++++++++++++++------ src/osd/ReplicatedPG.h | 2 +- src/osd/osd_types.h | 10 +++++++++- 6 files changed, 60 insertions(+), 17 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index ac565bb1a256f..5ca346769b64f 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1180,8 +1180,8 @@ void OSD::send_pg_stats() pg->pg_stats_lock.Lock(); if (pg->pg_stats_valid) { pg->pg_stats_valid = false; - m->pg_stat[pgid] = pg->pg_stats; - dout(30) << " sending " << pgid << " " << pg->pg_stats.state << dendl; + m->pg_stat[pgid] = pg->pg_stats_stable; + dout(30) << " sending " << pgid << " " << pg->pg_stats_stable.state << dendl; } pg->pg_stats_lock.Unlock(); } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index f67eb8bbdfde0..e76b74dc8412d 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -457,6 +457,9 @@ void PG::generate_backlog() vector olist; osd->store->collection_list(info.pgid.to_coll(), olist); + if (olist.size() != pg_stats.num_objects) + dout(10) << " WARNING: " << olist.size() << " != num_objects " << pg_stats.num_objects << dendl; + int local = 0; map add; @@ -1327,12 +1330,11 @@ void PG::update_stats() pg_stats_lock.Lock(); if (is_primary()) { // update our stat summary + pg_stats_stable = pg_stats; pg_stats_valid = true; pg_stats.version = info.last_update; pg_stats.reported = osd->osdmap->get_epoch(); pg_stats.state = state; - pg_stats.num_bytes = stat_num_bytes; - pg_stats.num_kb = stat_num_kb; pg_stats.acting = acting; } else { pg_stats_valid = false; @@ -1369,6 +1371,10 @@ void PG::write_info(ObjectStore::Transaction& t) bufferlist ki; ::encode(past_intervals, ki); t.collection_setattr(info.pgid.to_coll(), "past_intervals", ki); + + bufferlist st; + ::encode(pg_stats, st); + t.collection_setattr(info.pgid.to_coll(), "stats", st); } void PG::write_log(ObjectStore::Transaction& t) @@ -1567,6 +1573,13 @@ void PG::read_state(ObjectStore *store) ::decode(past_intervals, p); } + bl.clear(); + store->collection_getattr(info.pgid.to_coll(), "stats", bl); + if (bl.length()) { + p = bl.begin(); + ::decode(pg_stats, p); + } + read_log(store); } diff --git a/src/osd/PG.h b/src/osd/PG.h index b7a60b97aa0bb..41962bdb5205a 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -602,14 +602,11 @@ protected: // stats - loff_t stat_num_bytes; - loff_t stat_num_kb; - hash_map stat_object_temp_rd; Mutex pg_stats_lock; bool pg_stats_valid; - pg_stat_t pg_stats; + pg_stat_t pg_stats, pg_stats_stable; void update_stats(); void clear_stats(); @@ -693,7 +690,6 @@ public: pending_snap_removal_item(this), have_master_log(true), must_notify_mon(false), - stat_num_bytes(0), stat_num_kb(0), pg_stats_lock("PG::pg_stats_lock"), pg_stats_valid(false), finish_sync_event(NULL) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 27def3f7322b0..a5cabc205bbc9 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -433,6 +433,8 @@ bool ReplicatedPG::snap_trimmer() coll_t c = info.pgid.to_snap_coll(sn); vector ls; osd->store->collection_list(c, ls); + if (ls.size() != pg_stats.num_objects) + dout(10) << " WARNING: " << ls.size() << " != num_objects " << pg_stats.num_objects << dendl; dout(10) << "snap_trimmer collection " << c << " has " << ls.size() << " items" << dendl; @@ -475,7 +477,7 @@ bool ReplicatedPG::snap_trimmer() t.collection_remove(info.pgid.to_snap_coll(snaps[0]), coid); if (snaps.size() > 1) t.collection_remove(info.pgid.to_snap_coll(snaps[snaps.size()-1]), coid); - + // ...from snapset snapid_t last = coid.oid.snap; vector::iterator p; @@ -490,6 +492,8 @@ bool ReplicatedPG::snap_trimmer() snapset.clones.erase(p); snapset.clone_overlap.erase(last); snapset.clone_size.erase(last); + pg_stats.num_objects--; + pg_stats.num_object_clones--; } else { // save adjusted snaps for this object dout(10) << coid << " snaps " << snaps << " -> " << newsnaps << dendl; @@ -514,6 +518,7 @@ bool ReplicatedPG::snap_trimmer() if (snapset.clones.empty() && !snapset.head_exists) { dout(10) << coid << " removing head " << head << dendl; t.remove(info.pgid.to_coll(), head); + pg_stats.num_objects--; } else { bl.clear(); ::encode(snapset, bl); @@ -847,6 +852,8 @@ void ReplicatedPG::prepare_clone(ObjectStore::Transaction& t, bufferlist& logbl, t.collection_add(lc, info.pgid.to_coll(), coid); } + pg_stats.num_objects++; + pg_stats.num_object_clones++; snapset.clones.push_back(coid.oid.snap); snapset.clone_size[coid.oid.snap] = old_size; snapset.clone_overlap[coid.oid.snap].insert(0, old_size); @@ -862,7 +869,7 @@ void ReplicatedPG::prepare_clone(ObjectStore::Transaction& t, bufferlist& logbl, // low level object operations int ReplicatedPG::prepare_simple_op(ObjectStore::Transaction& t, osd_reqid_t reqid, - pobject_t poid, __u64& old_size, + pobject_t poid, __u64& old_size, bool& exists, ceph_osd_op& op, bufferlist::iterator& bp, SnapSet& snapset, SnapContext& snapc) { @@ -995,6 +1002,10 @@ int ReplicatedPG::prepare_simple_op(ObjectStore::Transaction& t, osd_reqid_t req } old_size = 0; snapset.head_exists = false; + if (exists) { + pg_stats.num_objects--; + exists = false; + } } break; @@ -1034,7 +1045,7 @@ int ReplicatedPG::prepare_simple_op(ObjectStore::Transaction& t, osd_reqid_t req newop.op = CEPH_OSD_OP_WRITE; newop.offset = old_size; newop.length = op.length; - prepare_simple_op(t, reqid, poid, old_size, newop, bp, snapset, snapc); + prepare_simple_op(t, reqid, poid, old_size, exists, newop, bp, snapset, snapc); } break; @@ -1043,6 +1054,11 @@ int ReplicatedPG::prepare_simple_op(ObjectStore::Transaction& t, osd_reqid_t req return -EINVAL; } + if (!exists && snapset.head_exists) { + pg_stats.num_objects++; + exists = true; + } + return 0; } @@ -1061,8 +1077,11 @@ void ReplicatedPG::prepare_transaction(ObjectStore::Transaction& t, osd_reqid_t struct stat st; int r = osd->store->stat(info.pgid.to_coll(), poid, &st); __u64 old_size = 0; - if (r == 0) + bool exists = false; + if (r == 0) { + exists = true; old_size = st.st_size; + } // apply ops bool did_snap = false; @@ -1071,10 +1090,12 @@ void ReplicatedPG::prepare_transaction(ObjectStore::Transaction& t, osd_reqid_t // clone? if (!did_snap && poid.oid.snap && !ceph_osd_op_type_lock(ops[i].op)) { // is a (non-lock) modification - prepare_clone(t, log_bl, reqid, poid, old_size, old_version, at_version, snapset, snapc); + prepare_clone(t, log_bl, reqid, poid, old_size, old_version, at_version, + snapset, snapc); did_snap = true; } - prepare_simple_op(t, reqid, poid, old_size, ops[i], bp, + prepare_simple_op(t, reqid, poid, old_size, exists, + ops[i], bp, snapset, snapc); } @@ -2636,6 +2657,9 @@ void ReplicatedPG::clean_up_local(ObjectStore::Transaction& t) // be thorough. vector ls; osd->store->collection_list(info.pgid.to_coll(), ls); + if (ls.size() != pg_stats.num_objects) + dout(10) << " WARNING: " << ls.size() << " != num_objects " << pg_stats.num_objects << dendl; + set s; for (vector::iterator i = ls.begin(); @@ -2703,6 +2727,8 @@ void ReplicatedPG::scrub() coll_t c = info.pgid.to_coll(); vector ls; osd->store->collection_list(c, ls); + if (ls.size() != pg_stats.num_objects) + dout(10) << " WARNING: " << ls.size() << " != num_objects " << pg_stats.num_objects << dendl; dout(10) << "scrub " << ls.size() << " objects" << dendl; sort(ls.begin(), ls.end()); diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index fcf441a3e8fdb..f2e6ccb64c5b8 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -122,7 +122,7 @@ protected: eversion_t old_version, eversion_t& at_version, SnapSet& snapset, SnapContext& snapc); int prepare_simple_op(ObjectStore::Transaction& t, osd_reqid_t reqid, - pobject_t poid, __u64& old_size, + pobject_t poid, __u64& old_size, bool& exists, ceph_osd_op& op, bufferlist::iterator& bp, SnapSet& snapset, SnapContext& snapc); void prepare_transaction(ObjectStore::Transaction& t, osd_reqid_t reqid, diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 87d382d446024..690cef858c04b 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -335,9 +335,12 @@ struct pg_stat_t { pg_t parent; int32_t parent_split_bits; int32_t state; + int64_t num_bytes; // in bytes int64_t num_kb; // in KB int64_t num_objects; + int64_t num_object_clones; + vector acting; void encode(bufferlist &bl) const { @@ -350,6 +353,7 @@ struct pg_stat_t { ::encode(num_bytes, bl); ::encode(num_kb, bl); ::encode(num_objects, bl); + ::encode(num_object_clones, bl); ::encode(acting, bl); } void decode(bufferlist::iterator &bl) { @@ -362,9 +366,13 @@ struct pg_stat_t { ::decode(num_bytes, bl); ::decode(num_kb, bl); ::decode(num_objects, bl); + ::decode(num_object_clones, bl); ::decode(acting, bl); } - pg_stat_t() : reported(0), created(0), parent_split_bits(0), state(0), num_bytes(0), num_kb(0), num_objects(0) {} + pg_stat_t() : reported(0), created(0), parent_split_bits(0), + state(0), + num_bytes(0), num_kb(0), + num_objects(0), num_object_clones(0) {} }; WRITE_CLASS_ENCODER(pg_stat_t) -- 2.39.5