]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: remove last_epoch_started_any
authorSage Weil <sage@newdream.net>
Fri, 16 May 2008 21:13:24 +0000 (14:13 -0700)
committerSage Weil <sage@newdream.net>
Fri, 16 May 2008 21:13:24 +0000 (14:13 -0700)
src/osd/OSD.cc
src/osd/PG.cc
src/osd/PG.h

index 096151bf820fe95fe25dc9dfac1f20cb6d0e1e8a..7874071e33e28de276b6b4fd02a61bdcb9024818 100644 (file)
@@ -524,8 +524,7 @@ PG * OSD::_create_lock_new_pg(pg_t pgid, vector<int>& acting, ObjectStore::Trans
   PG *pg = _create_lock_pg(pgid, t);
   pg->set_role(0);
   pg->acting.swap(acting);
-  pg->last_epoch_started_any = 
-    pg->info.history.epoch_created = 
+  pg->info.history.epoch_created = 
     pg->info.history.last_epoch_started = 
     pg->info.history.same_since = 
     pg->info.history.same_primary_since = 
@@ -2160,7 +2159,6 @@ void OSD::handle_pg_notify(MOSDPGNotify *m)
        pg->acting.swap(acting);
        pg->set_role(role);
        pg->info.history = history;
-       pg->last_epoch_started_any = history.last_epoch_started;  // _after_ clear_primary_state()
        pg->clear_primary_state();  // yep, notably, set hml=false
        pg->build_prior();      
        pg->write_log(t);
@@ -2188,10 +2186,11 @@ void OSD::handle_pg_notify(MOSDPGNotify *m)
 
     // ok!
     dout(10) << *pg << " osd" << from << " " << *it << dendl;
+    pg->info.history.merge(it->history);
 
     // stray?
     bool acting = pg->is_acting(from);
-    if (!acting && (*it).history.last_epoch_started > 0) {
+    if (!acting && (*it).last_update > eversion_t()) {     // FIXME is this right?
       dout(10) << *pg << " osd" << from << " has stray content: " << *it << dendl;
       pg->stray_set.insert(from);
       pg->state_clear(PG_STATE_CLEAN);
@@ -2215,8 +2214,7 @@ void OSD::handle_pg_notify(MOSDPGNotify *m)
       if (pg->is_all_uptodate()) 
        pg->finish_recovery();
     } else {
-      if (it->history.last_epoch_started > pg->last_epoch_started_any) 
-        pg->adjust_prior();
+      pg->adjust_prior();
       pg->peer(t, query_map, &info_map);
     }
 
@@ -2290,6 +2288,7 @@ void OSD::_process_pg_info(epoch_t epoch, int from,
   assert(pg);
 
   dout(10) << *pg << " got " << info << " " << log << " " << missing << dendl;
+  pg->info.history.merge(info.history);
 
   //m->log.print(cout);
 
@@ -2306,14 +2305,16 @@ void OSD::_process_pg_info(epoch_t epoch, int from,
     do_queries(query_map);
 
   } else {
-    // i am REPLICA
-    // merge log
-    pg->merge_log(log, missing, from);
-    pg->proc_missing(log, missing, from);
-    assert(pg->missing.num_lost() == 0);
-
-    // ok activate!
-    pg->activate(t, info_map);
+    if (!pg->info.dne()) {
+      // i am REPLICA
+      // merge log
+      pg->merge_log(log, missing, from);
+      pg->proc_missing(log, missing, from);
+      assert(pg->missing.num_lost() == 0);
+      
+      // ok activate!
+      pg->activate(t, info_map);
+    }
   }
 
   unsigned tr = store->apply_transaction(t);
@@ -2432,6 +2433,8 @@ void OSD::handle_pg_query(MOSDPGQuery *m)
       }
     }
 
+    pg->info.history.merge(it->second.history);
+
     // ok, process query!
     assert(!pg->acting.empty());
     assert(from == pg->acting[0]);
index fa1fd0501a5915b2703226953d909da06da430a3..29c9f6b82326b840a2ddf8bcdfb89d24c16c51f0 100644 (file)
@@ -570,7 +570,7 @@ void PG::build_prior()
   // for each acting set, we need to know same_since and last_epoch
   epoch_t first_epoch = info.history.same_since;
   epoch_t last_epoch = first_epoch - 1;
-  epoch_t stop = MAX(1, last_epoch_started_any);
+  epoch_t stop = MAX(1, info.history.last_epoch_started);
 
   dout(10) << "build_prior considering interval " << first_epoch << " down to " << stop << dendl;
   OSDMap *nextmap = new OSDMap;
@@ -638,7 +638,7 @@ void PG::adjust_prior()
 {
   assert(!prior_set.empty());
 
-  // raise last_epoch_started_any
+  // raise last_epoch_started
   epoch_t max = 0;
   for (map<int,Info>::iterator it = peer_info.begin();
        it != peer_info.end();
@@ -647,10 +647,10 @@ void PG::adjust_prior()
       max = it->second.history.last_epoch_started;
   }
 
-  dout(10) << "adjust_prior last_epoch_started_any " 
-           << last_epoch_started_any << " -> " << max << dendl;
-  assert(max > last_epoch_started_any);
-  last_epoch_started_any = max;
+  dout(10) << "adjust_prior last_epoch_started " 
+           << info.history.last_epoch_started << " -> " << max << dendl;
+  assert(max > info.history.last_epoch_started);
+  info.history.last_epoch_started = max;
 
   // rebuild prior set
   build_prior();
@@ -672,8 +672,6 @@ void PG::clear_primary_state()
   peer_missing.clear();
   
   stat_object_temp_rd.clear();
-
-  last_epoch_started_any = info.history.last_epoch_started;
 }
 
 void PG::peer(ObjectStore::Transaction& t, 
@@ -714,10 +712,10 @@ void PG::peer(ObjectStore::Transaction& t,
   // -- ok, we have all (prior_set) info.  (and maybe others.)
 
   // did we crash?
-  dout(10) << " last_epoch_started_any " << last_epoch_started_any << dendl;
-  if (last_epoch_started_any) {
+  dout(10) << " last_epoch_started " << info.history.last_epoch_started << dendl;
+  if (info.history.last_epoch_started) {
     OSDMap omap;
-    osd->get_map(last_epoch_started_any, omap);
+    osd->get_map(info.history.last_epoch_started, omap);
     
     // start with the last active set of replicas
     set<int> last_started;
@@ -728,7 +726,7 @@ void PG::peer(ObjectStore::Transaction& t,
       last_started.insert(acting[i]);
 
     // make sure at least one of them is still up
-    for (epoch_t e = last_epoch_started_any+1;
+    for (epoch_t e = info.history.last_epoch_started+1;
          e <= osd->osdmap->get_epoch();
          e++) {
       OSDMap omap;
@@ -752,16 +750,16 @@ void PG::peer(ObjectStore::Transaction& t,
     
     if (last_started.empty()) {
       if (cleanly_down) {
-       dout(10) << " cleanly stopped since epoch " << last_epoch_started_any << dendl;
+       dout(10) << " cleanly stopped since epoch " << info.history.last_epoch_started << dendl;
       } else {
-       dout(10) << " crashed since epoch " << last_epoch_started_any << dendl;
+       dout(10) << " crashed since epoch " << info.history.last_epoch_started << dendl;
        state_set(PG_STATE_CRASHED);
       }
     } else {
       dout(10) << " still active from last started: " << last_started << dendl;
     }
   } else if (osd->osdmap->get_epoch() > info.history.epoch_created) {  // FIXME hrm is htis right?
-    dout(10) << " crashed since epoch " << last_epoch_started_any << dendl;
+    dout(10) << " crashed since epoch " << info.history.last_epoch_started << dendl;
     state_set(PG_STATE_CRASHED);
   }    
 
@@ -957,7 +955,7 @@ void PG::activate(ObjectStore::Transaction& t,
     state_clear(PG_STATE_CRASHED);
     state_clear(PG_STATE_REPLAY);
   }
-  last_epoch_started_any = info.history.last_epoch_started = osd->osdmap->get_epoch();
+  info.history.last_epoch_started = osd->osdmap->get_epoch();
   
   if (role == 0) {    // primary state
     peers_complete_thru = eversion_t(0,0);  // we don't know (yet)!
index 38291f7188e7a4884472d86244d8e9ccb1709487..c6c97fe18e27bad513e88c16373aab0de5e4af4b 100644 (file)
@@ -67,8 +67,8 @@ public:
     bool       log_backlog;    // do we store a complete log?
 
     struct History {
-      epoch_t epoch_created;       // epoch in which it was created
-      epoch_t last_epoch_started;  // last epoch started.
+      epoch_t epoch_created;       // epoch in which PG was created
+      epoch_t last_epoch_started;  // lower bound on last epoch started (anywhere, not necessarily locally)
 
       epoch_t same_since;          // same acting set since
       epoch_t same_primary_since;  // same primary at least back through this epoch.
@@ -77,6 +77,14 @@ public:
        epoch_created(0),
        last_epoch_started(0),
        same_since(0), same_primary_since(0), same_acker_since(0) {}
+
+      void merge(const History &other) {
+       if (epoch_created < other.epoch_created)
+         epoch_created = other.epoch_created;
+       if (last_epoch_started < other.last_epoch_started)
+         last_epoch_started = other.last_epoch_started;
+      }
+
       void encode(bufferlist &bl) const {
        ::encode(epoch_created, bl);
        ::encode(last_epoch_started, bl);
@@ -498,14 +506,13 @@ protected:
   // primary state
  public:
   vector<int> acting;
-  epoch_t     last_epoch_started_any;
   eversion_t  last_complete_commit;
 
   // [primary only] content recovery state
   eversion_t  peers_complete_thru;
   bool        have_master_log;
  protected:
-  set<int>    prior_set;   // current+prior OSDs, as defined by last_epoch_started_any.
+  set<int>    prior_set;   // current+prior OSDs, as defined by info.history.last_epoch_started.
   bool        must_notify_mon;
   set<int>    stray_set;   // non-acting osds that have PG data.
   set<int>    uptodate_set;  // current OSDs that are uptodate
@@ -559,7 +566,7 @@ public:
   bool is_all_uptodate() const { return uptodate_set.size() == acting.size(); }
 
   void build_prior();
-  void adjust_prior();  // based on new peer_info.last_epoch_started_any
+  void adjust_prior();  // based on new peer_info.last_epoch_started
 
   bool adjust_peers_complete_thru() {
     eversion_t t = info.last_complete;
@@ -609,7 +616,6 @@ public:
     info(p),
     role(0),
     state(0),
-    last_epoch_started_any(0),
     have_master_log(true),
     must_notify_mon(false),
     stat_num_bytes(0), stat_num_blocks(0)