]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: change pg_stat_t::reported from eversion_t to a pair of fields
authorSage Weil <sage@inktank.com>
Mon, 8 Jul 2013 21:31:29 +0000 (14:31 -0700)
committerSage Weil <sage@inktank.com>
Tue, 9 Jul 2013 20:42:34 +0000 (13:42 -0700)
This rarely represents an actual eversion_t as the epoch and seq values are
bumped semi-independently to ensure it is always unique.  Break it into
two separate fields to avoid confusion.

Drop now-unused and slightly curious inc() method.

Signed-off-by: Sage Weil <sage@inktank.com>
src/messages/MPGStatsAck.h
src/mon/PGMap.cc
src/mon/PGMonitor.cc
src/osd/OSD.cc
src/osd/PG.cc
src/osd/PGLog.cc
src/osd/osd_types.cc
src/osd/osd_types.h
src/test/mon/test_mon_workloadgen.cc

index f268b68f9b8b33255f0071ca755b3e8d2b861824..0b7b99e8aacc11aff3b70e12af9e249d3dc518d3 100644 (file)
@@ -19,7 +19,7 @@
 
 class MPGStatsAck : public Message {
 public:
-  map<pg_t,eversion_t> pg_stat;
+  map<pg_t,pair<version_t,epoch_t> > pg_stat;
   
   MPGStatsAck() : Message(MSG_PGSTATSACK) {}
 
index c405b403f3951c8fa3e1a1cdb989e7af9d2d6892..f477bc9ee65baa3629e04b5a8c02c1c38cd53a3e 100644 (file)
@@ -582,7 +582,7 @@ void PGMap::dump_pg_stats_plain(ostream& ss,
        << "\t" << pg_state_string(st.state)
        << "\t" << st.last_change
        << "\t" << st.version
-       << "\t" << st.reported
+       << "\t" << st.reported_epoch << ":" << st.reported_seq
        << "\t" << st.up
        << "\t" << st.acting
        << "\t" << st.last_scrub << "\t" << st.last_scrub_stamp
index ead42a05dcae181c883078069eef2f02e9e2bc04..6d103eb22333630dc44931ee256084c8b36b51b0 100644 (file)
@@ -675,7 +675,8 @@ bool PGMonitor::pg_stats_have_changed(int from, const MPGStats *stats) const
     hash_map<pg_t,pg_stat_t>::const_iterator t = pg_map.pg_stat.find(p->first);
     if (t == pg_map.pg_stat.end())
       return true;
-    if (t->second.reported != p->second.reported)
+    if (t->second.reported_epoch != p->second.reported_epoch ||
+       t->second.reported_seq != p->second.reported_seq)
       return true;
   }
 
@@ -709,7 +710,7 @@ bool PGMonitor::prepare_pg_stats(MPGStats *stats)
     for (map<pg_t,pg_stat_t>::const_iterator p = stats->pg_stat.begin();
         p != stats->pg_stat.end();
         ++p) {
-      ack->pg_stat[p->first] = p->second.reported;
+      ack->pg_stat[p->first] = make_pair(p->second.reported_seq, p->second.reported_epoch);
     }
     mon->send_reply(stats, ack);
     stats->put();
@@ -731,22 +732,26 @@ bool PGMonitor::prepare_pg_stats(MPGStats *stats)
        p != stats->pg_stat.end();
        ++p) {
     pg_t pgid = p->first;
-    ack->pg_stat[pgid] = p->second.reported;
+    ack->pg_stat[pgid] = make_pair(p->second.reported_seq, p->second.reported_epoch);
 
-    if ((pg_map.pg_stat.count(pgid) && 
-        pg_map.pg_stat[pgid].reported > p->second.reported)) {
-      dout(15) << " had " << pgid << " from " << pg_map.pg_stat[pgid].reported << dendl;
+    if (pg_map.pg_stat.count(pgid) &&
+       (pg_map.pg_stat[pgid].reported_seq > p->second.reported_seq ||
+        pg_map.pg_stat[pgid].reported_epoch > p->second.reported_epoch)) {
+      dout(15) << " had " << pgid << " from " << pg_map.pg_stat[pgid].reported_epoch << ":"
+              << pg_map.pg_stat[pgid].reported_seq << dendl;
       continue;
     }
     if (pending_inc.pg_stat_updates.count(pgid) && 
-       pending_inc.pg_stat_updates[pgid].reported > p->second.reported) {
-      dout(15) << " had " << pgid << " from " << pending_inc.pg_stat_updates[pgid].reported
-              << " (pending)" << dendl;
+       (pending_inc.pg_stat_updates[pgid].reported_seq > p->second.reported_seq ||
+        pending_inc.pg_stat_updates[pgid].reported_epoch > p->second.reported_epoch)) {
+      dout(15) << " had " << pgid << " from " << pending_inc.pg_stat_updates[pgid].reported_epoch << ":"
+              << pending_inc.pg_stat_updates[pgid].reported_seq << " (pending)" << dendl;
       continue;
     }
 
     if (pg_map.pg_stat.count(pgid) == 0) {
-      dout(15) << " got " << pgid << " reported at " << p->second.reported
+      dout(15) << " got " << pgid << " reported at " << p->second.reported_epoch << ":"
+              << p->second.reported_seq
               << " state " << pg_state_string(p->second.state)
               << " but DNE in pg_map; pool was probably deleted."
               << dendl;
@@ -754,7 +759,7 @@ bool PGMonitor::prepare_pg_stats(MPGStats *stats)
     }
       
     dout(15) << " got " << pgid
-            << " reported at " << p->second.reported
+            << " reported at " << p->second.reported_epoch << ":" << p->second.reported_seq
             << " state " << pg_state_string(pg_map.pg_stat[pgid].state)
             << " -> " << pg_state_string(p->second.state)
             << dendl;
index 755078ce074c470844c1ac8277af7b06092d9b73..1449eeac9136e83e49909fbf74e8e0629f76946e 100644 (file)
@@ -3655,9 +3655,11 @@ void OSD::send_pg_stats(const utime_t &now)
       pg->pg_stats_publish_lock.Lock();
       if (pg->pg_stats_publish_valid) {
        m->pg_stat[pg->info.pgid] = pg->pg_stats_publish;
-       dout(25) << " sending " << pg->info.pgid << " " << pg->pg_stats_publish.reported << dendl;
+       dout(25) << " sending " << pg->info.pgid << " " << pg->pg_stats_publish.reported_epoch << ":"
+                << pg->pg_stats_publish.reported_seq << dendl;
       } else {
-       dout(25) << " NOT sending " << pg->info.pgid << " " << pg->pg_stats_publish.reported << ", not valid" << dendl;
+       dout(25) << " NOT sending " << pg->info.pgid << " " << pg->pg_stats_publish.reported_epoch << ":"
+                << pg->pg_stats_publish.reported_seq << ", not valid" << dendl;
       }
       pg->pg_stats_publish_lock.Unlock();
     }
@@ -3697,19 +3699,22 @@ void OSD::handle_pg_stats_ack(MPGStatsAck *ack)
     ++p;
 
     if (ack->pg_stat.count(pg->info.pgid)) {
-      eversion_t acked = ack->pg_stat[pg->info.pgid];
+      pair<version_t,epoch_t> acked = ack->pg_stat[pg->info.pgid];
       pg->pg_stats_publish_lock.Lock();
-      if (acked == pg->pg_stats_publish.reported) {
-       dout(25) << " ack on " << pg->info.pgid << " " << pg->pg_stats_publish.reported << dendl;
+      if (acked.first == pg->pg_stats_publish.reported_seq &&
+         acked.second == pg->pg_stats_publish.reported_epoch) {
+       dout(25) << " ack on " << pg->info.pgid << " " << pg->pg_stats_publish.reported_epoch
+                << ":" << pg->pg_stats_publish.reported_seq << dendl;
        pg->stat_queue_item.remove_myself();
        pg->put("pg_stat_queue");
       } else {
-       dout(25) << " still pending " << pg->info.pgid << " " << pg->pg_stats_publish.reported
-                << " > acked " << acked << dendl;
+       dout(25) << " still pending " << pg->info.pgid << " " << pg->pg_stats_publish.reported_epoch
+                << ":" << pg->pg_stats_publish.reported_seq << " > acked " << acked << dendl;
       }
       pg->pg_stats_publish_lock.Unlock();
     } else {
-      dout(30) << " still pending " << pg->info.pgid << " " << pg->pg_stats_publish.reported << dendl;
+      dout(30) << " still pending " << pg->info.pgid << " " << pg->pg_stats_publish.reported_epoch
+              << ":" << pg->pg_stats_publish.reported_seq << dendl;
     }
   }
   
index dd636d9e7f93abac252e257aa9ab06e9d70c7416..acb20fc21c5960e25b9a34df99182ca95c125c99 100644 (file)
@@ -1888,7 +1888,8 @@ void PG::publish_stats_to_osd()
   pg_stats_publish_lock.Lock();
   if (is_primary()) {
     // update our stat summary
-    info.stats.reported.inc(get_osdmap()->get_epoch());
+    info.stats.reported_epoch = get_osdmap()->get_epoch();
+    ++info.stats.reported_seq;
     info.stats.version = info.last_update;
     info.stats.created = info.history.epoch_created;
     info.stats.last_scrub = info.history.last_scrub;
@@ -1961,7 +1962,8 @@ void PG::publish_stats_to_osd()
       pg_stats_publish.stats.sum.num_objects_unfound = get_num_unfound();
     }
 
-    dout(15) << "publish_stats_to_osd " << pg_stats_publish.reported << dendl;
+    dout(15) << "publish_stats_to_osd " << pg_stats_publish.reported_epoch
+            << ":" << pg_stats_publish.reported_seq << dendl;
   } else {
     pg_stats_publish_valid = false;
     dout(15) << "publish_stats_to_osd -- not primary" << dendl;
@@ -5931,8 +5933,8 @@ boost::statechart::result PG::RecoveryState::Active::react(const AdvMap& advmap)
   }
 
   // if we haven't reported our PG stats in a long time, do so now.
-  if (pg->info.stats.reported.epoch + g_conf->osd_pg_stat_report_interval_max < advmap.osdmap->get_epoch()) {
-    dout(20) << "reporting stats to osd after " << (advmap.osdmap->get_epoch() - pg->info.stats.reported.epoch)
+  if (pg->info.stats.reported_epoch + g_conf->osd_pg_stat_report_interval_max < advmap.osdmap->get_epoch()) {
+    dout(20) << "reporting stats to osd after " << (advmap.osdmap->get_epoch() - pg->info.stats.reported_epoch)
             << " epochs" << dendl;
     pg->publish_stats_to_osd();
   }
index f1896f7c6d2b16af82c786eda1de1443272fc050..6ba08362dadde6e1fd26172b70c4ff142300476d 100644 (file)
@@ -441,8 +441,11 @@ void PGLog::merge_log(ObjectStore::Transaction& t,
     changed = true;
   }
 
-  if (oinfo.stats.reported < info.stats.reported)   // make sure reported always increases
-    oinfo.stats.reported = info.stats.reported;
+  if (oinfo.stats.reported_seq < info.stats.reported_seq ||   // make sure reported always increases
+      oinfo.stats.reported_epoch < info.stats.reported_epoch) {
+    oinfo.stats.reported_seq = info.stats.reported_seq;
+    oinfo.stats.reported_epoch = info.stats.reported_epoch;
+  }
   if (info.last_backfill.is_max())
     info.stats = oinfo.stats;
 
index bfa3cdddaae63a1c12a01880e83798a90ce03848..2447199a7f05d2818cd3bfa8b6bc35fc242d9920 100644 (file)
@@ -1110,7 +1110,8 @@ void object_stat_collection_t::generate_test_instances(list<object_stat_collecti
 void pg_stat_t::dump(Formatter *f) const
 {
   f->dump_stream("version") << version;
-  f->dump_stream("reported") << reported;
+  f->dump_stream("reported_seq") << reported_seq;
+  f->dump_stream("reported_epoch") << reported_epoch;
   f->dump_string("state", pg_state_string(state));
   f->dump_stream("last_fresh") << last_fresh;
   f->dump_stream("last_change") << last_change;
@@ -1148,7 +1149,8 @@ void pg_stat_t::encode(bufferlist &bl) const
 {
   ENCODE_START(13, 8, bl);
   ::encode(version, bl);
-  ::encode(reported, bl);
+  ::encode(reported_seq, bl);
+  ::encode(reported_epoch, bl);
   ::encode(state, bl);
   ::encode(log_start, bl);
   ::encode(ondisk_log_start, bl);
@@ -1181,7 +1183,8 @@ void pg_stat_t::decode(bufferlist::iterator &bl)
 {
   DECODE_START_LEGACY_COMPAT_LEN(13, 8, 8, bl);
   ::decode(version, bl);
-  ::decode(reported, bl);
+  ::decode(reported_seq, bl);
+  ::decode(reported_epoch, bl);
   ::decode(state, bl);
   ::decode(log_start, bl);
   ::decode(ondisk_log_start, bl);
@@ -1267,7 +1270,8 @@ void pg_stat_t::generate_test_instances(list<pg_stat_t*>& o)
   o.push_back(new pg_stat_t(a));
 
   a.version = eversion_t(1, 3);
-  a.reported = eversion_t(1, 2);
+  a.reported_epoch = 1;
+  a.reported_seq = 2;
   a.state = 123;
   a.mapping_epoch = 998;
   a.last_fresh = utime_t(1002, 1);
index 8e30bb14e1dde983a3f915d613c8fb6acca881d7..ae7ec78e2b9ffeba44f63732a53e76548f4c8060 100644 (file)
@@ -461,12 +461,6 @@ public:
     return c;
   }
 
-  void inc(epoch_t e) {
-    if (epoch < e)
-      epoch = e;
-    version++;
-  }
-
   string get_key_name() const;
 
   void encode(bufferlist &bl) const {
@@ -905,7 +899,8 @@ WRITE_CLASS_ENCODER(object_stat_collection_t)
  */
 struct pg_stat_t {
   eversion_t version;
-  eversion_t reported;
+  version_t reported_seq;  // sequence number
+  epoch_t reported_epoch;  // epoch of this report
   __u32 state;
   utime_t last_fresh;   // last reported
   utime_t last_change;  // new state != previous state
@@ -939,7 +934,9 @@ struct pg_stat_t {
   utime_t last_became_active;
 
   pg_stat_t()
-    : state(0),
+    : reported_seq(0),
+      reported_epoch(0),
+      state(0),
       created(0), last_epoch_clean(0),
       parent_split_bits(0),
       stats_invalid(false),
@@ -951,7 +948,7 @@ struct pg_stat_t {
     if (state & PG_STATE_CLEAN) {
       // we are clean as of this report, and should thus take the
       // reported epoch
-      return reported.epoch;
+      return reported_epoch;
     } else {
       return last_epoch_clean;
     }
index 07f999180a39d766b8f986ef31a6da1ad663dcaf..e5bde68b6311e1c34f33fd765872010c1cc7f593 100644 (file)
@@ -577,7 +577,7 @@ class OSDStub : public TestStub
     if (s.state & PG_STATE_CLEAN)
       s.last_clean = now;
     s.last_change = now;
-    s.reported.inc(1);
+    s.reported_seq++;
 
     pgs_changes.insert(pgid);
   }