osd: do not mark pg clean until active is durable

author Sage Weil <sage@inktank.com>

Sat, 5 May 2012 18:24:57 +0000 (11:24 -0700)

committer Sage Weil <sage@inktank.com>

Sat, 5 May 2012 20:08:09 +0000 (13:08 -0700)
author Sage Weil <sage@inktank.com>
Sat, 5 May 2012 18:24:57 +0000 (11:24 -0700)
committer Sage Weil <sage@inktank.com>
Sat, 5 May 2012 20:08:09 +0000 (13:08 -0700)
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc

index fdceb67064a045d573c922b7a5b99e41b401caca..48d8db53e53e4c9a2fa4eb685ce7300d91c695df 100644 (file)
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -5065,7 +5065,6 @@ void OSD::do_recovery(PG *pg)
         recovery_wq.lock();
         pg->recovery_item.remove_myself();      // sigh...
         recovery_wq.unlock();
-
        }
      }
  
@@ -5073,6 +5072,8 @@ void OSD::do_recovery(PG *pg)
      do_queries(query_map);
      do_infos(info_map);
  
+    pg->write_if_dirty(*t);
+
      if (!t->empty()) {
        int tr = store->queue_transaction(&pg->osr, t, new ObjectStore::C_DeleteTransaction(t), fin);
        assert(tr == 0);
@@ -5080,7 +5081,6 @@ void OSD::do_recovery(PG *pg)
        delete t;
        delete fin;
      }
-
      pg->unlock();
    }
    pg->put();
diff --git a/src/osd/PG.cc b/src/osd/PG.cc

index 5d23cdff70b6ace810943dfa8e08657b368ec072..ba97b968930d39d5c7fa54b845ee10fc8ea621dd 100644 (file)
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -1521,9 +1521,17 @@ void PG::all_activated_and_committed()
    assert(peer_activated.size() == acting.size());
  
    info.history.last_epoch_started = get_osdmap()->get_epoch();
-  share_pg_info();
-
    dirty_info = true;
+
+  // make sure CLEAN is marked if we've been clean in this interval
+  if (info.last_complete == info.last_update &&
+      !state_test(PG_STATE_BACKFILL) &&
+      !state_test(PG_STATE_RECOVERING)) {
+    mark_clean();
+  }
+
+  share_pg_info();
+  update_stats();
  }
  
  void PG::queue_snap_trim()
@@ -1555,30 +1563,38 @@ struct C_PG_FinishRecovery : public Context {
    }
  };
  
-void PG::finish_recovery(ObjectStore::Transaction& t, list<Context*>& tfin)
+void PG::mark_clean()
  {
-  dout(10) << "finish_recovery" << dendl;
-  state_clear(PG_STATE_BACKFILL);
-  state_clear(PG_STATE_RECOVERING);
-
    // only mark CLEAN if we have the desired number of replicas AND we
    // are not remapped.
    if (acting.size() == get_osdmap()->get_pg_size(info.pgid) &&
        up == acting)
      state_set(PG_STATE_CLEAN);
  
-  assert(info.last_complete == info.last_update);
-
    // NOTE: this is actually a bit premature: we haven't purged the
    // strays yet.
    info.history.last_epoch_clean = get_osdmap()->get_epoch();
-  share_pg_info();
-
-  clear_recovery_state();
  
    trim_past_intervals();
-  
-  write_info(t);
+
+  dirty_info = true;
+}
+
+void PG::finish_recovery(ObjectStore::Transaction& t, list<Context*>& tfin)
+{
+  dout(10) << "finish_recovery" << dendl;
+  assert(info.last_complete == info.last_update);
+
+  state_clear(PG_STATE_BACKFILL);
+  state_clear(PG_STATE_RECOVERING);
+
+  // only mark CLEAN if last_epoch_started is already stable.
+  if (info.history.last_epoch_started >= info.history.same_interval_since) {
+    mark_clean();
+    share_pg_info();
+  }
+
+  clear_recovery_state();
  
    /*
     * sync all this before purging strays.  but don't block!
diff --git a/src/osd/PG.h b/src/osd/PG.h

index 027e386dfdef92d3ab49da7e704d705fad517ad3..eec356775bef3e5c071c3483170effd8fde75493 100644 (file)
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -638,6 +638,7 @@ public:
    
    bool needs_recovery() const;
  
+  void mark_clean();  ///< mark an active pg clean
    void generate_past_intervals();
    void trim_past_intervals();
    void build_prior(std::auto_ptr<PriorSet> &prior_set);
author	Sage Weil <sage@inktank.com>
	Sat, 5 May 2012 18:24:57 +0000 (11:24 -0700)
committer	Sage Weil <sage@inktank.com>
	Sat, 5 May 2012 20:08:09 +0000 (13:08 -0700)
src/osd/OSD.cc		patch \| blob \| history
src/osd/PG.cc		patch \| blob \| history
src/osd/PG.h		patch \| blob \| history