]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon/PGMonitor: reliably mark PGs state 8089/head
authorSage Weil <sage@redhat.com>
Mon, 14 Mar 2016 15:17:02 +0000 (11:17 -0400)
committerSage Weil <sage@redhat.com>
Mon, 14 Mar 2016 15:17:02 +0000 (11:17 -0400)
In beb99d708a5051405a32f46fc10cef473bfddce3 we partially addressed
the case where a acting_primary update raced with an attemp to
mark the pg state.  However, if the acting_primary changed, we
didn't check the new acting_primary to ensure that it is also
down and we should still mark things stale.

Also, printing out the acting_primary aids debugging.

Signed-off-by: Sage Weil <sage@redhat.com>
src/mon/PGMonitor.cc
src/mon/PGMonitor.h

index 5cd2edb58c260f1b3daeb44961205560ae535d3c..91449bcb49858e82ed0c1611767cf32b04635848 100644 (file)
@@ -1281,9 +1281,11 @@ epoch_t PGMonitor::send_pg_creates(int osd, Connection *con, epoch_t next)
   return last + 1;
 }
 
-void PGMonitor::_mark_pg_stale(pg_t pgid, const pg_stat_t& cur_stat)
+void PGMonitor::_try_mark_pg_stale(
+  OSDMap *osdmap,
+  pg_t pgid,
+  const pg_stat_t& cur_stat)
 {
-  dout(10) << " marking pg " << pgid << " stale" << dendl;
   map<pg_t,pg_stat_t>::iterator q = pending_inc.pg_stat_updates.find(pgid);
   pg_stat_t *stat;
   if (q == pending_inc.pg_stat_updates.end()) {
@@ -1292,7 +1294,12 @@ void PGMonitor::_mark_pg_stale(pg_t pgid, const pg_stat_t& cur_stat)
   } else {
     stat = &q->second;
   }
-  if (stat->acting_primary == cur_stat.acting_primary) {
+  if ((stat->acting_primary == cur_stat.acting_primary) ||
+      ((stat->state & PG_STATE_STALE) == 0 &&
+       stat->acting_primary != -1 &&
+       osdmap->is_down(stat->acting_primary))) {
+    dout(10) << " marking pg " << pgid << " stale (acting_primary "
+            << stat->acting_primary << ")" << dendl;
     stat->state |= PG_STATE_STALE;  
     stat->last_unstale = ceph_clock_now(g_ceph_context);
   }
@@ -1316,7 +1323,7 @@ bool PGMonitor::check_down_pgs()
       if ((p.second.state & PG_STATE_STALE) == 0 &&
           p.second.acting_primary != -1 &&
           osdmap->is_down(p.second.acting_primary)) {
-       _mark_pg_stale(p.first, p.second);
+       _try_mark_pg_stale(osdmap, p.first, p.second);
        ret = true;
       }
     }
@@ -1327,7 +1334,7 @@ bool PGMonitor::check_down_pgs()
          const pg_stat_t &stat = pg_map.pg_stat[pgid];
          if ((stat.state & PG_STATE_STALE) == 0 &&
              stat.acting_primary != -1) {
-           _mark_pg_stale(pgid, stat);
+           _try_mark_pg_stale(osdmap, pgid, stat);
            ret = true;
          }
        }
index c5af197fe591f5bafb7f274f476b7745d7cf65af..d3351f3e7ca389aed32d01e6c048bf6fd7d726bf 100644 (file)
@@ -141,7 +141,7 @@ private:
    * @return true if we updated pending_inc (and should propose)
    */
   bool check_down_pgs();
-  void _mark_pg_stale(pg_t pgid, const pg_stat_t& cur_stat);
+  void _try_mark_pg_stale(OSDMap *osdmap, pg_t pgid, const pg_stat_t& cur_stat);
 
 
   /**