From: Sage Weil Date: Mon, 8 Jul 2013 21:31:29 +0000 (-0700) Subject: osd: change pg_stat_t::reported from eversion_t to a pair of fields X-Git-Tag: v0.67-rc1~131^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8e075146f9d46bb4599acb892c749f8a0e248626;p=ceph.git osd: change pg_stat_t::reported from eversion_t to a pair of fields This rarely represents an actual eversion_t as the epoch and seq values are bumped semi-independently to ensure it is always unique. Break it into two separate fields to avoid confusion. Drop now-unused and slightly curious inc() method. Signed-off-by: Sage Weil --- diff --git a/src/messages/MPGStatsAck.h b/src/messages/MPGStatsAck.h index f268b68f9b8b..0b7b99e8aacc 100644 --- a/src/messages/MPGStatsAck.h +++ b/src/messages/MPGStatsAck.h @@ -19,7 +19,7 @@ class MPGStatsAck : public Message { public: - map pg_stat; + map > pg_stat; MPGStatsAck() : Message(MSG_PGSTATSACK) {} diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index c405b403f395..f477bc9ee65b 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -582,7 +582,7 @@ void PGMap::dump_pg_stats_plain(ostream& ss, << "\t" << pg_state_string(st.state) << "\t" << st.last_change << "\t" << st.version - << "\t" << st.reported + << "\t" << st.reported_epoch << ":" << st.reported_seq << "\t" << st.up << "\t" << st.acting << "\t" << st.last_scrub << "\t" << st.last_scrub_stamp diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index ead42a05dcae..6d103eb22333 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -675,7 +675,8 @@ bool PGMonitor::pg_stats_have_changed(int from, const MPGStats *stats) const hash_map::const_iterator t = pg_map.pg_stat.find(p->first); if (t == pg_map.pg_stat.end()) return true; - if (t->second.reported != p->second.reported) + if (t->second.reported_epoch != p->second.reported_epoch || + t->second.reported_seq != p->second.reported_seq) return true; } @@ -709,7 +710,7 @@ bool PGMonitor::prepare_pg_stats(MPGStats *stats) for (map::const_iterator p = stats->pg_stat.begin(); p != stats->pg_stat.end(); ++p) { - ack->pg_stat[p->first] = p->second.reported; + ack->pg_stat[p->first] = make_pair(p->second.reported_seq, p->second.reported_epoch); } mon->send_reply(stats, ack); stats->put(); @@ -731,22 +732,26 @@ bool PGMonitor::prepare_pg_stats(MPGStats *stats) p != stats->pg_stat.end(); ++p) { pg_t pgid = p->first; - ack->pg_stat[pgid] = p->second.reported; + ack->pg_stat[pgid] = make_pair(p->second.reported_seq, p->second.reported_epoch); - if ((pg_map.pg_stat.count(pgid) && - pg_map.pg_stat[pgid].reported > p->second.reported)) { - dout(15) << " had " << pgid << " from " << pg_map.pg_stat[pgid].reported << dendl; + if (pg_map.pg_stat.count(pgid) && + (pg_map.pg_stat[pgid].reported_seq > p->second.reported_seq || + pg_map.pg_stat[pgid].reported_epoch > p->second.reported_epoch)) { + dout(15) << " had " << pgid << " from " << pg_map.pg_stat[pgid].reported_epoch << ":" + << pg_map.pg_stat[pgid].reported_seq << dendl; continue; } if (pending_inc.pg_stat_updates.count(pgid) && - pending_inc.pg_stat_updates[pgid].reported > p->second.reported) { - dout(15) << " had " << pgid << " from " << pending_inc.pg_stat_updates[pgid].reported - << " (pending)" << dendl; + (pending_inc.pg_stat_updates[pgid].reported_seq > p->second.reported_seq || + pending_inc.pg_stat_updates[pgid].reported_epoch > p->second.reported_epoch)) { + dout(15) << " had " << pgid << " from " << pending_inc.pg_stat_updates[pgid].reported_epoch << ":" + << pending_inc.pg_stat_updates[pgid].reported_seq << " (pending)" << dendl; continue; } if (pg_map.pg_stat.count(pgid) == 0) { - dout(15) << " got " << pgid << " reported at " << p->second.reported + dout(15) << " got " << pgid << " reported at " << p->second.reported_epoch << ":" + << p->second.reported_seq << " state " << pg_state_string(p->second.state) << " but DNE in pg_map; pool was probably deleted." << dendl; @@ -754,7 +759,7 @@ bool PGMonitor::prepare_pg_stats(MPGStats *stats) } dout(15) << " got " << pgid - << " reported at " << p->second.reported + << " reported at " << p->second.reported_epoch << ":" << p->second.reported_seq << " state " << pg_state_string(pg_map.pg_stat[pgid].state) << " -> " << pg_state_string(p->second.state) << dendl; diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 755078ce074c..1449eeac9136 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3655,9 +3655,11 @@ void OSD::send_pg_stats(const utime_t &now) pg->pg_stats_publish_lock.Lock(); if (pg->pg_stats_publish_valid) { m->pg_stat[pg->info.pgid] = pg->pg_stats_publish; - dout(25) << " sending " << pg->info.pgid << " " << pg->pg_stats_publish.reported << dendl; + dout(25) << " sending " << pg->info.pgid << " " << pg->pg_stats_publish.reported_epoch << ":" + << pg->pg_stats_publish.reported_seq << dendl; } else { - dout(25) << " NOT sending " << pg->info.pgid << " " << pg->pg_stats_publish.reported << ", not valid" << dendl; + dout(25) << " NOT sending " << pg->info.pgid << " " << pg->pg_stats_publish.reported_epoch << ":" + << pg->pg_stats_publish.reported_seq << ", not valid" << dendl; } pg->pg_stats_publish_lock.Unlock(); } @@ -3697,19 +3699,22 @@ void OSD::handle_pg_stats_ack(MPGStatsAck *ack) ++p; if (ack->pg_stat.count(pg->info.pgid)) { - eversion_t acked = ack->pg_stat[pg->info.pgid]; + pair acked = ack->pg_stat[pg->info.pgid]; pg->pg_stats_publish_lock.Lock(); - if (acked == pg->pg_stats_publish.reported) { - dout(25) << " ack on " << pg->info.pgid << " " << pg->pg_stats_publish.reported << dendl; + if (acked.first == pg->pg_stats_publish.reported_seq && + acked.second == pg->pg_stats_publish.reported_epoch) { + dout(25) << " ack on " << pg->info.pgid << " " << pg->pg_stats_publish.reported_epoch + << ":" << pg->pg_stats_publish.reported_seq << dendl; pg->stat_queue_item.remove_myself(); pg->put("pg_stat_queue"); } else { - dout(25) << " still pending " << pg->info.pgid << " " << pg->pg_stats_publish.reported - << " > acked " << acked << dendl; + dout(25) << " still pending " << pg->info.pgid << " " << pg->pg_stats_publish.reported_epoch + << ":" << pg->pg_stats_publish.reported_seq << " > acked " << acked << dendl; } pg->pg_stats_publish_lock.Unlock(); } else { - dout(30) << " still pending " << pg->info.pgid << " " << pg->pg_stats_publish.reported << dendl; + dout(30) << " still pending " << pg->info.pgid << " " << pg->pg_stats_publish.reported_epoch + << ":" << pg->pg_stats_publish.reported_seq << dendl; } } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index dd636d9e7f93..acb20fc21c59 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1888,7 +1888,8 @@ void PG::publish_stats_to_osd() pg_stats_publish_lock.Lock(); if (is_primary()) { // update our stat summary - info.stats.reported.inc(get_osdmap()->get_epoch()); + info.stats.reported_epoch = get_osdmap()->get_epoch(); + ++info.stats.reported_seq; info.stats.version = info.last_update; info.stats.created = info.history.epoch_created; info.stats.last_scrub = info.history.last_scrub; @@ -1961,7 +1962,8 @@ void PG::publish_stats_to_osd() pg_stats_publish.stats.sum.num_objects_unfound = get_num_unfound(); } - dout(15) << "publish_stats_to_osd " << pg_stats_publish.reported << dendl; + dout(15) << "publish_stats_to_osd " << pg_stats_publish.reported_epoch + << ":" << pg_stats_publish.reported_seq << dendl; } else { pg_stats_publish_valid = false; dout(15) << "publish_stats_to_osd -- not primary" << dendl; @@ -5931,8 +5933,8 @@ boost::statechart::result PG::RecoveryState::Active::react(const AdvMap& advmap) } // if we haven't reported our PG stats in a long time, do so now. - if (pg->info.stats.reported.epoch + g_conf->osd_pg_stat_report_interval_max < advmap.osdmap->get_epoch()) { - dout(20) << "reporting stats to osd after " << (advmap.osdmap->get_epoch() - pg->info.stats.reported.epoch) + if (pg->info.stats.reported_epoch + g_conf->osd_pg_stat_report_interval_max < advmap.osdmap->get_epoch()) { + dout(20) << "reporting stats to osd after " << (advmap.osdmap->get_epoch() - pg->info.stats.reported_epoch) << " epochs" << dendl; pg->publish_stats_to_osd(); } diff --git a/src/osd/PGLog.cc b/src/osd/PGLog.cc index f1896f7c6d2b..6ba08362dadd 100644 --- a/src/osd/PGLog.cc +++ b/src/osd/PGLog.cc @@ -441,8 +441,11 @@ void PGLog::merge_log(ObjectStore::Transaction& t, changed = true; } - if (oinfo.stats.reported < info.stats.reported) // make sure reported always increases - oinfo.stats.reported = info.stats.reported; + if (oinfo.stats.reported_seq < info.stats.reported_seq || // make sure reported always increases + oinfo.stats.reported_epoch < info.stats.reported_epoch) { + oinfo.stats.reported_seq = info.stats.reported_seq; + oinfo.stats.reported_epoch = info.stats.reported_epoch; + } if (info.last_backfill.is_max()) info.stats = oinfo.stats; diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index bfa3cdddaae6..2447199a7f05 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -1110,7 +1110,8 @@ void object_stat_collection_t::generate_test_instances(listdump_stream("version") << version; - f->dump_stream("reported") << reported; + f->dump_stream("reported_seq") << reported_seq; + f->dump_stream("reported_epoch") << reported_epoch; f->dump_string("state", pg_state_string(state)); f->dump_stream("last_fresh") << last_fresh; f->dump_stream("last_change") << last_change; @@ -1148,7 +1149,8 @@ void pg_stat_t::encode(bufferlist &bl) const { ENCODE_START(13, 8, bl); ::encode(version, bl); - ::encode(reported, bl); + ::encode(reported_seq, bl); + ::encode(reported_epoch, bl); ::encode(state, bl); ::encode(log_start, bl); ::encode(ondisk_log_start, bl); @@ -1181,7 +1183,8 @@ void pg_stat_t::decode(bufferlist::iterator &bl) { DECODE_START_LEGACY_COMPAT_LEN(13, 8, 8, bl); ::decode(version, bl); - ::decode(reported, bl); + ::decode(reported_seq, bl); + ::decode(reported_epoch, bl); ::decode(state, bl); ::decode(log_start, bl); ::decode(ondisk_log_start, bl); @@ -1267,7 +1270,8 @@ void pg_stat_t::generate_test_instances(list& o) o.push_back(new pg_stat_t(a)); a.version = eversion_t(1, 3); - a.reported = eversion_t(1, 2); + a.reported_epoch = 1; + a.reported_seq = 2; a.state = 123; a.mapping_epoch = 998; a.last_fresh = utime_t(1002, 1); diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 8e30bb14e1dd..ae7ec78e2b9f 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -461,12 +461,6 @@ public: return c; } - void inc(epoch_t e) { - if (epoch < e) - epoch = e; - version++; - } - string get_key_name() const; void encode(bufferlist &bl) const { @@ -905,7 +899,8 @@ WRITE_CLASS_ENCODER(object_stat_collection_t) */ struct pg_stat_t { eversion_t version; - eversion_t reported; + version_t reported_seq; // sequence number + epoch_t reported_epoch; // epoch of this report __u32 state; utime_t last_fresh; // last reported utime_t last_change; // new state != previous state @@ -939,7 +934,9 @@ struct pg_stat_t { utime_t last_became_active; pg_stat_t() - : state(0), + : reported_seq(0), + reported_epoch(0), + state(0), created(0), last_epoch_clean(0), parent_split_bits(0), stats_invalid(false), @@ -951,7 +948,7 @@ struct pg_stat_t { if (state & PG_STATE_CLEAN) { // we are clean as of this report, and should thus take the // reported epoch - return reported.epoch; + return reported_epoch; } else { return last_epoch_clean; } diff --git a/src/test/mon/test_mon_workloadgen.cc b/src/test/mon/test_mon_workloadgen.cc index 07f999180a39..e5bde68b6311 100644 --- a/src/test/mon/test_mon_workloadgen.cc +++ b/src/test/mon/test_mon_workloadgen.cc @@ -577,7 +577,7 @@ class OSDStub : public TestStub if (s.state & PG_STATE_CLEAN) s.last_clean = now; s.last_change = now; - s.reported.inc(1); + s.reported_seq++; pgs_changes.insert(pgid); }