]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/,mon/: add (up|acting)_primary to pg_stat_t
authorSamuel Just <sam.just@inktank.com>
Wed, 19 Feb 2014 20:16:46 +0000 (12:16 -0800)
committerSamuel Just <sam.just@inktank.com>
Wed, 19 Feb 2014 22:14:18 +0000 (14:14 -0800)
We use pg_stat_t information to determine pg create targeting.

Fixes: #7481
Signed-off-by: Samuel Just <sam.just@inktank.com>
Reviewed-by: Greg Farnum <greg@inktank.com>
src/mon/PGMap.cc
src/mon/PGMonitor.cc
src/osd/PG.cc
src/osd/osd_types.cc
src/osd/osd_types.h

index 56376d8b6ce3647770761fd213f36c6e2a65ad38..ad2aacbf6db096a0df36e4f49a852093525728df 100644 (file)
@@ -622,7 +622,7 @@ void PGMap::dump_osd_stats(Formatter *f) const
 void PGMap::dump_pg_stats_plain(ostream& ss,
                                const ceph::unordered_map<pg_t, pg_stat_t>& pg_stats) const
 {
-  ss << "pg_stat\tobjects\tmip\tdegr\tunf\tbytes\tlog\tdisklog\tstate\tstate_stamp\tv\treported\tup\tacting\tlast_scrub\tscrub_stamp\tlast_deep_scrub\tdeep_scrub_stamp" << std::endl;
+  ss << "pg_stat\tobjects\tmip\tdegr\tunf\tbytes\tlog\tdisklog\tstate\tstate_stamp\tv\treported\tup\tup_primary\tacting\tacting_primary\tlast_scrub\tscrub_stamp\tlast_deep_scrub\tdeep_scrub_stamp" << std::endl;
   for (ceph::unordered_map<pg_t, pg_stat_t>::const_iterator i = pg_stats.begin();
        i != pg_stats.end(); ++i) {
     const pg_stat_t &st(i->second);
@@ -640,7 +640,9 @@ void PGMap::dump_pg_stats_plain(ostream& ss,
        << "\t" << st.version
        << "\t" << st.reported_epoch << ":" << st.reported_seq
        << "\t" << st.up
+       << "\t" << st.up_primary
        << "\t" << st.acting
+       << "\t" << st.acting_primary
        << "\t" << st.last_scrub << "\t" << st.last_scrub_stamp
        << "\t" << st.last_deep_scrub << "\t" << st.last_deep_scrub_stamp
        << std::endl;
index a858a550610930905aff4eebb670ee2ee569f610..43683b67fc97d192c8bba79080a97a2398a95e71 100644 (file)
@@ -1071,15 +1071,25 @@ void PGMonitor::map_pg_creates()
     pg_stat_t& s = pg_map.pg_stat[pgid];
     if (s.parent_split_bits)
       on = s.parent;
-    vector<int> acting;
-    int nrep = osdmap->pg_to_acting_osds(on, acting);
 
-    if (s.acting.size()) {
-      pg_map.creating_pgs_by_osd[s.acting[0]].erase(pgid);
-      if (pg_map.creating_pgs_by_osd[s.acting[0]].size() == 0)
-        pg_map.creating_pgs_by_osd.erase(s.acting[0]);
+    vector<int> up, acting;
+    int up_primary, acting_primary;
+    osdmap->pg_to_up_acting_osds(
+      on,
+      &up,
+      &up_primary,
+      &acting,
+      &acting_primary);
+
+    if (s.acting_primary != -1) {
+      pg_map.creating_pgs_by_osd[s.acting_primary].erase(pgid);
+      if (pg_map.creating_pgs_by_osd[s.acting_primary].size() == 0)
+        pg_map.creating_pgs_by_osd.erase(s.acting_primary);
     }
+    s.up = up;
+    s.up_primary = up_primary;
     s.acting = acting;
+    s.acting_primary = acting_primary;
 
     // don't send creates for localized pgs
     if (pgid.preferred() >= 0)
@@ -1089,8 +1099,8 @@ void PGMonitor::map_pg_creates()
     if (s.parent_split_bits)
       continue;
 
-    if (nrep) {
-      pg_map.creating_pgs_by_osd[acting[0]].insert(pgid);
+    if (acting_primary != -1) {
+      pg_map.creating_pgs_by_osd[acting_primary].insert(pgid);
     } else {
       dout(20) << "map_pg_creates  " << pgid << " -> no osds in epoch "
               << mon->osdmon()->osdmap.get_epoch() << ", skipping" << dendl;
@@ -1159,8 +1169,8 @@ bool PGMonitor::check_down_pgs()
        p != pg_map.pg_stat.end();
        ++p) {
     if ((p->second.state & PG_STATE_STALE) == 0 &&
-       p->second.acting.size() &&
-       osdmap->is_down(p->second.acting[0])) {
+       p->second.acting_primary != -1 &&
+       osdmap->is_down(p->second.acting_primary)) {
       dout(10) << " marking pg " << p->first << " stale with acting " << p->second.acting << dendl;
 
       map<pg_t,pg_stat_t>::iterator q = pending_inc.pg_stat_updates.find(p->first);
@@ -1529,12 +1539,12 @@ bool PGMonitor::preprocess_command(MMonCommand *m)
       r = -ENOENT;
       goto reply;
     }
-    if (!pg_map.pg_stat[pgid].acting.size()) {
+    if (pg_map.pg_stat[pgid].acting_primary != -1) {
       ss << "pg " << pgid << " has no primary osd";
       r = -EAGAIN;
       goto reply;
     }
-    int osd = pg_map.pg_stat[pgid].acting[0];
+    int osd = pg_map.pg_stat[pgid].acting_primary;
     if (!mon->osdmon()->osdmap.is_up(osd)) {
       ss << "pg " << pgid << " primary osd." << osd << " not up";
       r = -EAGAIN;
index bfb48feb880e80f4da47ce73359c1b1a105b42cc..5812b8bbb7bf75cd640a61dbc210370c499f457a 100644 (file)
@@ -2341,7 +2341,9 @@ void PG::init(
   past_intervals.swap(pi);
 
   info.stats.up = up;
+  info.stats.up_primary = new_up_primary;
   info.stats.acting = acting;
+  info.stats.acting_primary = new_acting_primary;
   info.stats.mapping_epoch = info.history.same_interval_since;
 
   if (backfill) {
@@ -4638,9 +4640,13 @@ void PG::start_peering_interval(
     new_acting_primary);
 
   if (info.stats.up != up ||
-      info.stats.acting != acting) {
+      info.stats.acting != acting ||
+      info.stats.up_primary != new_up_primary ||
+      info.stats.acting_primary != new_acting_primary) {
     info.stats.up = up;
+    info.stats.up_primary = new_up_primary;
     info.stats.acting = acting;
+    info.stats.acting_primary = new_acting_primary;
     info.stats.mapping_epoch = info.history.same_interval_since;
   }
 
index d8af4c1097f26d5a78c2318726fbd507cd82dccb..f0a82a8a56ee254ffd067cba5c07f0e6b9d7ba8a 100644 (file)
@@ -1541,6 +1541,8 @@ void pg_stat_t::dump(Formatter *f) const
   f->open_array_section("acting");
   for (vector<int>::const_iterator p = acting.begin(); p != acting.end(); ++p)
     f->dump_int("osd", *p);
+  f->dump_int("up_primary", up_primary);
+  f->dump_int("acting_primary", acting_primary);
   f->close_section();
 }
 
@@ -1554,12 +1556,14 @@ void pg_stat_t::dump_brief(Formatter *f) const
   f->open_array_section("acting");
   for (vector<int>::const_iterator p = acting.begin(); p != acting.end(); ++p)
     f->dump_int("osd", *p);
+  f->dump_int("up_primary", up_primary);
+  f->dump_int("acting_primary", acting_primary);
   f->close_section();
 }
 
 void pg_stat_t::encode(bufferlist &bl) const
 {
-  ENCODE_START(14, 8, bl);
+  ENCODE_START(15, 8, bl);
   ::encode(version, bl);
   ::encode(reported_seq, bl);
   ::encode(reported_epoch, bl);
@@ -1589,12 +1593,14 @@ void pg_stat_t::encode(bufferlist &bl) const
   ::encode(last_clean_scrub_stamp, bl);
   ::encode(last_became_active, bl);
   ::encode(dirty_stats_invalid, bl);
+  ::encode(up_primary, bl);
+  ::encode(acting_primary, bl);
   ENCODE_FINISH(bl);
 }
 
 void pg_stat_t::decode(bufferlist::iterator &bl)
 {
-  DECODE_START_LEGACY_COMPAT_LEN(14, 8, 8, bl);
+  DECODE_START_LEGACY_COMPAT_LEN(15, 8, 8, bl);
   ::decode(version, bl);
   ::decode(reported_seq, bl);
   ::decode(reported_epoch, bl);
@@ -1681,6 +1687,13 @@ void pg_stat_t::decode(bufferlist::iterator &bl)
     // encoder may not have supported num_objects_dirty accounting.
     dirty_stats_invalid = true;
   }
+  if (struct_v >= 15) {
+    ::decode(up_primary, bl);
+    ::decode(acting_primary, bl);
+  } else {
+    up_primary = up.size() ? up[0] : -1;
+    acting_primary = acting.size() ? acting[0] : -1;
+  }
   DECODE_FINISH(bl);
 }
 
@@ -1716,7 +1729,15 @@ void pg_stat_t::generate_test_instances(list<pg_stat_t*>& o)
   a.log_size = 99;
   a.ondisk_log_size = 88;
   a.up.push_back(123);
+  a.up_primary = 123;
   a.acting.push_back(456);
+  a.acting_primary = 456;
+  o.push_back(new pg_stat_t(a));
+
+  a.up.push_back(124);
+  a.up_primary = 124;
+  a.acting.push_back(124);
+  a.acting_primary = 124;
   o.push_back(new pg_stat_t(a));
 }
 
index 732439cb941bbba711a15665cacb84038c27263a..f74723bbbaac137748974116485721bf24d77136 100644 (file)
@@ -1287,6 +1287,10 @@ struct pg_stat_t {
   /// maintained starting from pool creation)
   bool dirty_stats_invalid;
 
+  /// up, acting primaries
+  int up_primary;
+  int acting_primary;
+
   pg_stat_t()
     : reported_seq(0),
       reported_epoch(0),
@@ -1296,7 +1300,9 @@ struct pg_stat_t {
       stats_invalid(false),
       log_size(0), ondisk_log_size(0),
       mapping_epoch(0),
-      dirty_stats_invalid(false)
+      dirty_stats_invalid(false),
+      up_primary(-1),
+      acting_primary(-1)
   { }
 
   epoch_t get_effective_last_epoch_clean() const {