osd: refactor recovery completion

author Sage Weil <sage.weil@dreamhost.com>

Fri, 17 Feb 2012 21:19:57 +0000 (13:19 -0800)

committer Sage Weil <sage.weil@dreamhost.com>

Fri, 17 Feb 2012 21:19:57 +0000 (13:19 -0800)
author Sage Weil <sage.weil@dreamhost.com>
Fri, 17 Feb 2012 21:19:57 +0000 (13:19 -0800)
committer Sage Weil <sage.weil@dreamhost.com>
Fri, 17 Feb 2012 21:19:57 +0000 (13:19 -0800)
diff --git a/src/osd/PG.cc b/src/osd/PG.cc

index 47024cca53fbf7fd03a8a376dbd9ef96c79d4ba1..16704c48eff4094e7ea13485ff994e671aca3d6d 100644 (file)
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -636,21 +636,15 @@ ostream& PG::IndexedLog::print(ostream& out) const
  
  
  /******* PG ***********/
-bool PG::is_all_uptodate() const
+bool PG::needs_recovery() const
  {
    assert(is_primary());
  
-  bool uptodate = true;
-
-  if (up != acting) {
-    dout(10) << __func__ << ": the set of UP osds is not the same as the "
-            << "set of ACTING osds." << dendl;
-    uptodate = false;
-  }
+  bool ret = false;
  
    if (missing.num_missing()) {
-    dout(10) << __func__ << ": primary has " << missing.num_missing() << dendl;
-    uptodate = false;
+    dout(10) << __func__ << " primary has " << missing.num_missing() << dendl;
+    ret = true;
    }
  
    vector<int>::const_iterator end = acting.end();
@@ -661,24 +655,24 @@ bool PG::is_all_uptodate() const
      int peer = *a;
      map<int, pg_missing_t>::const_iterator pm = peer_missing.find(peer);
      if (pm == peer_missing.end()) {
-      dout(10) << __func__ << ": osd." << peer << " don't have missing set" << dendl;
-      uptodate = false;
+      dout(10) << __func__ << " osd." << peer << " don't have missing set" << dendl;
+      ret = true;
        continue;
      }
      if (pm->second.num_missing()) {
-      dout(10) << __func__ << ": osd." << peer << " has " << pm->second.num_missing() << " missing" << dendl;
-      uptodate = false;
+      dout(10) << __func__ << " osd." << peer << " has " << pm->second.num_missing() << " missing" << dendl;
+      ret = true;
      }
      map<int,pg_info_t>::const_iterator pi = peer_info.find(peer);
      if (pi->second.last_backfill != hobject_t::get_max()) {
-      dout(10) << __func__ << ": osd." << peer << " has last_backfill " << pi->second.last_backfill << dendl;
-      uptodate = false;
+      dout(10) << __func__ << " osd." << peer << " has last_backfill " << pi->second.last_backfill << dendl;
+      ret = true;
      }
    }
  
-  if (uptodate)
-    dout(10) << __func__ << ": everyone is uptodate" << dendl;
-  return uptodate;
+  if (!ret)
+    dout(10) << __func__ << " is recovered" << dendl;
+  return ret;
  }
  
  void PG::generate_past_intervals()
@@ -1369,9 +1363,9 @@ void PG::activate(ObjectStore::Transaction& t, list<Context*>& tfin,
        state_set(PG_STATE_DEGRADED);
  
      // all clean?
-    if (is_all_uptodate()) 
+    if (!needs_recovery()) {
        finish_recovery(t, tfin);
-    else {
+    } else {
        dout(10) << "activate not all replicas are uptodate, queueing recovery" << dendl;
        state_set(PG_STATE_RECOVERING);
        osd->queue_for_recovery(this);
@@ -1545,10 +1539,7 @@ void PG::finish_recovery(ObjectStore::Transaction& t, list<Context*>& tfin)
    state_clear(PG_STATE_BACKFILL);
    state_clear(PG_STATE_RECOVERING);
  
-  // only clear DEGRADED (or mark CLEAN) if we have enough (or the
-  // desired number of) replicas.
-  if (acting.size() >= get_osdmap()->get_pg_size(info.pgid))
-    state_clear(PG_STATE_DEGRADED);
+  // only mark CLEAN if we have the desired number of replicas.
    if (acting.size() == get_osdmap()->get_pg_size(info.pgid))
      state_set(PG_STATE_CLEAN);
  
@@ -3995,12 +3986,6 @@ boost::statechart::result PG::RecoveryState::Active::react(const ActMap&)
      pg->queue_snap_trim();
    }
  
-  if (pg->is_all_uptodate()) {
-    dout(10) << "Active: all up to date, going clean" << dendl;
-    pg->finish_recovery(*context< RecoveryMachine >().get_cur_transaction(),
-                       *context< RecoveryMachine >().get_context_list());
-  }
-
    return forward_event();
  }
  
@@ -4068,6 +4053,9 @@ boost::statechart::result PG::RecoveryState::Active::react(const RecoveryComplet
  
    int newest_update_osd;
  
+  pg->state_clear(PG_STATE_BACKFILL);
+  pg->state_clear(PG_STATE_RECOVERING);
+
    // adjust acting set?  (e.g. because backfill completed...)
    if (pg->acting != pg->up &&
        !pg->choose_acting(newest_update_osd)) {
@@ -4075,15 +4063,9 @@ boost::statechart::result PG::RecoveryState::Active::react(const RecoveryComplet
      return discard_event();
    }
  
-  if (pg->is_all_uptodate()) {
-    dout(10) << "recovery complete" << dendl;
-    pg->log.reset_recovery_pointers();
-    pg->finish_recovery(*context< RecoveryMachine >().get_cur_transaction(),
-                       *context< RecoveryMachine >().get_context_list());
-  } else {
-    dout(10) << "recovery not yet complete: some osds not up to date" << dendl;
-  }
-
+  assert(!pg->needs_recovery());
+  pg->finish_recovery(*context< RecoveryMachine >().get_cur_transaction(),
+                     *context< RecoveryMachine >().get_context_list());
    return discard_event();
  }
  
diff --git a/src/osd/PG.h b/src/osd/PG.h

index 6d187fea9a0ce68487ae3f1222e3c95bc4924e52..8e50c44093b56747a88da370bddc02761a579f29 100644 (file)
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -674,7 +674,7 @@ public:
      return false;
    }
    
-  bool is_all_uptodate() const;
+  bool needs_recovery() const;
  
    void generate_past_intervals();
    void trim_past_intervals();
author	Sage Weil <sage.weil@dreamhost.com>
	Fri, 17 Feb 2012 21:19:57 +0000 (13:19 -0800)
committer	Sage Weil <sage.weil@dreamhost.com>
	Fri, 17 Feb 2012 21:19:57 +0000 (13:19 -0800)
src/osd/PG.cc		patch \| blob \| history
src/osd/PG.h		patch \| blob \| history