]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: fix prior_set_up_thru condition
authorSage Weil <sage@newdream.net>
Tue, 4 Nov 2008 22:43:48 +0000 (14:43 -0800)
committerSage Weil <sage@newdream.net>
Tue, 4 Nov 2008 22:43:48 +0000 (14:43 -0800)
If an OSD's up_thru affects the membership of the prior_set, take note.
Then, if the osd's up_thru changes later, we know to rebuild it.

src/osd/PG.cc
src/osd/PG.h

index 4d05462a58125901e393f1b3f7199c0845938377..0c5d7383785d2fca549a0a2aa8da2647a010d69d 100644 (file)
@@ -562,15 +562,17 @@ bool PG::prior_set_affected(OSDMap *osdmap)
       return true;
     }
   
-  // did primary's up_thru change?
-  if (acting.size() && prior_set_down.size() &&
-      prior_set_primary_up_thru != osdmap->get_up_thru(acting[0])) {
-    dout(10) << "prior_set_affected: primary osd" << acting[0]
-            << " up_thru " << prior_set_primary_up_thru
-            << " -> " << osdmap->get_up_thru(acting[0]) 
-            << dendl;
-    return true;
-  }
+  // did a significant osd's up_thru change?
+  for (map<int,epoch_t>::iterator p = prior_set_up_thru.begin();
+       p != prior_set_up_thru.end();
+       p++)
+    if (p->second != osdmap->get_up_thru(p->first)) {
+      dout(10) << "prior_set_affected: primary osd" << p->first
+              << " up_thru " << p->second
+              << " -> " << osdmap->get_up_thru(p->first) 
+              << dendl;
+      return true;
+    }
 
   return false;
 }
@@ -630,6 +632,7 @@ void PG::build_prior()
   // build prior set.
   prior_set.clear();
   prior_set_down.clear();
+  prior_set_up_thru.clear();
 
   // current nodes, of course.
   for (unsigned i=1; i<acting.size(); i++)
@@ -674,10 +677,9 @@ void PG::build_prior()
       continue;
     }
 
-    prior_set_primary_up_thru = lastmap->get_up_thru(acting[0]);
     bool maybe_went_rw = 
-      prior_set_primary_up_thru >= first_epoch &&
-      prior_set_primary_up_thru < first_epoch;
+      lastmap->get_up_thru(acting[0]) >= first_epoch &&
+      lastmap->get_up_from(acting[0]) < first_epoch;
 
     dout(10) << "build_prior epochs " << first_epoch << "-" << last_epoch << " " << acting
             << " - primary osd" << acting[0]
@@ -702,6 +704,7 @@ void PG::build_prior()
        dout(10) << "build_prior  prior osd" << acting[i] << " is down, must notify mon" << dendl;
        must_notify_mon = true;
 
+       // include osd in set anyway
        prior_set_down.insert(acting[i]);
 
        // fixme: how do we identify a "clean" shutdown anyway?
@@ -712,6 +715,11 @@ void PG::build_prior()
                     << (lastmap->get_up_thru(acting[i]) + 1) << dendl;
            some_down = true;
            prior_set.insert(acting[i]);
+
+           // take note that we care about this osd's up_thru.  if it
+           // changes later, it will affect our prior_set, and we'll want
+           // to rebuild the prior set!
+           prior_set_up_thru[acting[0]] = lastmap->get_up_thru(acting[0]);
          }
        }
       }
@@ -746,7 +754,7 @@ void PG::clear_primary_state()
   have_master_log = false;
   prior_set.clear();
   prior_set_down.clear();
-  prior_set_primary_up_thru = 0;
+  prior_set_up_thru.clear();
   stray_set.clear();
   uptodate_set.clear();
   peer_info_requested.clear();
index f61a781d044d89ab5a92c6c565413a945f43a7df..3a1a73c5a559b47631f28a1e9bb9ee367cb18ae3 100644 (file)
@@ -562,8 +562,8 @@ protected:
   bool        have_master_log;
  protected:
   set<int>    prior_set;   // current+prior OSDs, as defined by info.history.last_epoch_started.
-  set<int>    prior_set_down;
-  epoch_t     prior_set_primary_up_thru;
+  set<int>    prior_set_down;          // down osds exluded from prior_set
+  map<int,epoch_t> prior_set_up_thru;  // osds whose up_thru we care about
   bool        must_notify_mon;
   set<int>    stray_set;   // non-acting osds that have PG data.
   set<int>    uptodate_set;  // current OSDs that are uptodate