]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: reorder PG recovery_state initialization
authorSage Weil <sage.weil@dreamhost.com>
Fri, 3 Feb 2012 17:27:47 +0000 (09:27 -0800)
committerSage Weil <sage.weil@dreamhost.com>
Fri, 3 Feb 2012 17:27:47 +0000 (09:27 -0800)
The state machine state constructors print stuff to the logs, and the
PG::gen_prefix() includes all kinds of PG fields in that.  Move the
recovery_state member to the end of the class so that all previous fields
are initialized when that happens.  This makes valgrind shut up.

Some more deliberate tidying of the PG members and methods would also
be nice...

Signed-off-by: Sage Weil <sage.weil@dreamhost.com>
src/osd/PG.h

index 108af9f462258a64173839f4cae8d372830b74af..a19ef075b1b1324608c85c58d403cd35636f6557 100644 (file)
@@ -538,6 +538,271 @@ public:
   };
 
 
+protected:
+
+  /*
+   * peer_info    -- projected (updates _before_ replicas ack)
+   * peer_missing -- committed (updates _after_ replicas ack)
+   */
+  
+  bool        need_up_thru;
+  set<int>    stray_set;   // non-acting osds that have PG data.
+  eversion_t  oldest_update; // acting: lowest (valid) last_update in active set
+  map<int,pg_info_t>    peer_info;   // info from peers (stray or prior)
+  map<int,pg_missing_t> peer_missing;
+  set<int>             peer_log_requested;  // logs i've requested (and start stamps)
+  set<int>             peer_missing_requested;
+  set<int>             stray_purged;  // i deleted these strays; ignore racing PGInfo from them
+  set<int>             peer_activated;
+
+  // primary-only, recovery-only state
+  set<int>             might_have_unfound;  // These osds might have objects on them
+                                           // which are unfound on the primary
+
+  epoch_t last_peering_reset;
+
+  /**
+   * BackfillInterval
+   *
+   * Represents the objects in a range [begin, end)
+   *
+   * Possible states:
+   * 1) begin == end == hobject_t() indicates the the interval is unpopulated
+   * 2) Else, objects contains all objects in [begin, end)
+   */
+  struct BackfillInterval {
+    // info about a backfill interval on a peer
+    map<hobject_t,eversion_t> objects;
+    hobject_t begin;
+    hobject_t end;
+    
+    /// clear content
+    void clear() {
+      objects.clear();
+      begin = end = hobject_t();
+    }
+
+    void reset(hobject_t start) {
+      clear();
+      begin = end = start;
+    }
+
+    /// true if there are no objects in this interval
+    bool empty() {
+      return objects.empty();
+    }
+
+    /// true if interval extends to the end of the range
+    bool extends_to_end() {
+      return end == hobject_t::get_max();
+    }
+
+    /// Adjusts begin to the first object
+    void trim() {
+      if (objects.size())
+       begin = objects.begin()->first;
+      else
+       begin = end;
+    }
+
+    /// drop first entry, and adjust @begin accordingly
+    void pop_front() {
+      assert(!objects.empty());
+      objects.erase(objects.begin());
+      if (objects.empty())
+       begin = end;
+      else
+       begin = objects.begin()->first;
+    }
+  };
+  
+  BackfillInterval backfill_info;
+  BackfillInterval peer_backfill_info;
+  int backfill_target;
+
+  friend class OSD;
+
+public:
+  int get_backfill_target() const {
+    return backfill_target;
+  }
+
+protected:
+
+
+  // pg waiters
+  list<OpRequest*>            waiting_for_active;
+  list<OpRequest*>            waiting_for_all_missing;
+  map<hobject_t, list<OpRequest*> > waiting_for_missing_object,
+                                        waiting_for_degraded_object;
+  map<eversion_t,list<OpRequest*> > waiting_for_ondisk;
+  map<eversion_t,OpRequest*>   replay_queue;
+
+  void requeue_object_waiters(map<hobject_t, list<OpRequest*> >& m);
+
+  // stats
+  Mutex pg_stats_lock;
+  bool pg_stats_valid;
+  pg_stat_t pg_stats_stable;
+
+  // for ordering writes
+  ObjectStore::Sequencer osr;
+
+  void update_stats();
+  void clear_stats();
+
+public:
+  void clear_primary_state();
+
+ public:
+  bool is_acting(int osd) const { 
+    for (unsigned i=0; i<acting.size(); i++)
+      if (acting[i] == osd) return true;
+    return false;
+  }
+  bool is_up(int osd) const { 
+    for (unsigned i=0; i<up.size(); i++)
+      if (up[i] == osd) return true;
+    return false;
+  }
+  
+  bool is_all_uptodate() const;
+
+  void generate_past_intervals();
+  void trim_past_intervals();
+  void build_prior(std::auto_ptr<PriorSet> &prior_set);
+
+  void remove_down_peer_info(const OSDMapRef osdmap);
+
+  bool adjust_need_up_thru(const OSDMapRef osdmap);
+
+  bool all_unfound_are_queried_or_lost(const OSDMapRef osdmap) const;
+  virtual void mark_all_unfound_lost(int how) = 0;
+
+  bool calc_min_last_complete_ondisk() {
+    eversion_t min = last_complete_ondisk;
+    for (unsigned i=1; i<acting.size(); i++) {
+      if (peer_last_complete_ondisk.count(acting[i]) == 0)
+       return false;   // we don't have complete info
+      eversion_t a = peer_last_complete_ondisk[acting[i]];
+      if (a < min)
+       min = a;
+    }
+    if (min == min_last_complete_ondisk)
+      return false;
+    min_last_complete_ondisk = min;
+    return true;
+  }
+
+  virtual void calc_trim_to() = 0;
+
+  void proc_replica_log(ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t &olog,
+                       pg_missing_t& omissing, int from);
+  void proc_master_log(ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t &olog,
+                      pg_missing_t& omissing, int from);
+  bool proc_replica_info(int from, pg_info_t &info);
+  bool merge_old_entry(ObjectStore::Transaction& t, pg_log_entry_t& oe);
+  void merge_log(ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t &olog, int from);
+  void rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead);
+  bool search_for_missing(const pg_info_t &oinfo, const pg_missing_t *omissing,
+                         int fromosd);
+
+  void check_for_lost_objects();
+  void forget_lost_objects();
+
+  void discover_all_missing(std::map< int, map<pg_t,pg_query_t> > &query_map);
+  
+  void trim_write_ahead();
+
+  map<int, pg_info_t>::const_iterator find_best_info(const map<int, pg_info_t> &infos) const;
+  bool calc_acting(int& newest_update_osd, vector<int>& want) const;
+  bool choose_acting(int& newest_update_osd);
+  void build_might_have_unfound();
+  void replay_queued_ops();
+  void activate(ObjectStore::Transaction& t, list<Context*>& tfin,
+               map< int, map<pg_t,pg_query_t> >& query_map,
+               map<int, MOSDPGInfo*> *activator_map=0);
+  void _activate_committed(epoch_t e);
+  void all_activated_and_committed();
+
+  void proc_primary_info(ObjectStore::Transaction &t, const pg_info_t &info);
+
+  bool have_unfound() const { 
+    return missing.num_missing() > missing_loc.size();
+  }
+  int get_num_unfound() const {
+    return missing.num_missing() - missing_loc.size();
+  }
+
+  virtual void clean_up_local(ObjectStore::Transaction& t) = 0;
+
+  virtual int start_recovery_ops(int max, RecoveryCtx *prctx) = 0;
+
+  void purge_strays();
+
+  Context *finish_sync_event;
+
+  void finish_recovery(ObjectStore::Transaction& t, list<Context*>& tfin);
+  void _finish_recovery(Context *c);
+  void cancel_recovery();
+  void clear_recovery_state();
+  virtual void _clear_recovery_state() = 0;
+  void defer_recovery();
+  virtual void check_recovery_op_pulls(const OSDMapRef newmap) = 0;
+  void start_recovery_op(const hobject_t& soid);
+  void finish_recovery_op(const hobject_t& soid, bool dequeue=false);
+
+  loff_t get_log_write_pos() {
+    return 0;
+  }
+
+  friend class C_OSD_RepModify_Commit;
+
+
+  // -- scrub --
+  set<int> scrub_reserved_peers;
+  map<int,ScrubMap> scrub_received_maps;
+  bool finalizing_scrub; 
+  bool scrub_reserved, scrub_reserve_failed;
+  int scrub_waiting_on;
+  epoch_t scrub_epoch_start;
+  ScrubMap primary_scrubmap;
+  MOSDRepScrub *active_rep_scrub;
+
+  void repair_object(const hobject_t& soid, ScrubMap::object *po, int bad_peer, int ok_peer);
+  bool _compare_scrub_objects(ScrubMap::object &auth,
+                             ScrubMap::object &candidate,
+                             ostream &errorstream);
+  void _compare_scrubmaps(const map<int,ScrubMap*> &maps,  
+                         map<hobject_t, set<int> > &missing,
+                         map<hobject_t, set<int> > &inconsistent,
+                         map<hobject_t, int> &authoritative,
+                         ostream &errorstream);
+  void scrub();
+  void scrub_finalize();
+  void scrub_clear_state();
+  bool scrub_gather_replica_maps();
+  void _scan_list(ScrubMap &map, vector<hobject_t> &ls);
+  void _request_scrub_map(int replica, eversion_t version);
+  void build_scrub_map(ScrubMap &map);
+  void build_inc_scrub_map(ScrubMap &map, eversion_t v);
+  virtual int _scrub(ScrubMap &map, int& errors, int& fixed) { return 0; }
+  void clear_scrub_reserved();
+  void scrub_reserve_replicas();
+  void scrub_unreserve_replicas();
+  bool scrub_all_replicas_reserved() const;
+  bool sched_scrub();
+
+  void replica_scrub(class MOSDRepScrub *op);
+  void sub_op_scrub_map(OpRequest *op);
+  void sub_op_scrub_reserve(OpRequest *op);
+  void sub_op_scrub_reserve_reply(OpRequest *op);
+  void sub_op_scrub_unreserve(OpRequest *op);
+  void sub_op_scrub_stop(OpRequest *op);
+
+
+  // -- recovery state --
+
   /* Encapsulates PG recovery process */
   class RecoveryState {
     void start_handle(RecoveryCtx *new_ctx) {
@@ -697,528 +962,267 @@ public:
        boost::statechart::custom_reaction< MLogRec >,
        boost::statechart::transition< boost::statechart::event_base, Crashed >
        > reactions;
-
-      boost::statechart::result react(const MNotifyRec&);
-      boost::statechart::result react(const MInfoRec&);
-      boost::statechart::result react(const MLogRec&);
-    };
-
-    struct Reset : boost::statechart::state< Reset, RecoveryMachine >, NamedState {
-      Reset(my_context ctx);
-      void exit();
-
-      typedef boost::mpl::list <
-       boost::statechart::custom_reaction< AdvMap >,
-       boost::statechart::custom_reaction< ActMap >,
-       boost::statechart::transition< boost::statechart::event_base, Crashed >
-       > reactions;
-      boost::statechart::result react(const AdvMap&);
-      boost::statechart::result react(const ActMap&);
-    };
-
-    struct Start;
-
-    struct Started : boost::statechart::state< Started, RecoveryMachine, Start >, NamedState {
-      Started(my_context ctx);
-      void exit();
-
-      typedef boost::mpl::list <
-       boost::statechart::custom_reaction< AdvMap >,
-       boost::statechart::transition< boost::statechart::event_base, Crashed >
-       > reactions;
-      boost::statechart::result react(const AdvMap&);
-    };
-
-    struct MakePrimary : boost::statechart::event< MakePrimary > {
-      MakePrimary() : boost::statechart::event< MakePrimary >() {}
-    };
-    struct MakeStray : boost::statechart::event< MakeStray > {
-      MakeStray() : boost::statechart::event< MakeStray >() {}
-    };
-    struct Primary;
-    struct Stray;
-
-    struct Start : boost::statechart::state< Start, Started >, NamedState {
-      Start(my_context ctx);
-      void exit();
-
-      typedef boost::mpl::list <
-       boost::statechart::transition< MakePrimary, Primary >,
-       boost::statechart::transition< MakeStray, Stray >
-       > reactions;
-    };
-
-    struct Peering;
-    struct WaitActingChange;
-    struct NeedActingChange : boost::statechart::event< NeedActingChange > {
-      NeedActingChange() : boost::statechart::event< NeedActingChange >() {}
-    };
-    struct Incomplete;
-    struct IsIncomplete : boost::statechart::event< IsIncomplete > {
-      IsIncomplete() : boost::statechart::event< IsIncomplete >() {}
-    };
-
-    struct Primary : boost::statechart::state< Primary, Started, Peering >, NamedState {
-      Primary(my_context ctx);
-      void exit();
-
-      typedef boost::mpl::list <
-       boost::statechart::custom_reaction< ActMap >,
-       boost::statechart::custom_reaction< MNotifyRec >,
-       boost::statechart::custom_reaction< AdvMap >,
-       boost::statechart::transition< NeedActingChange, WaitActingChange >,
-       boost::statechart::transition< IsIncomplete, Incomplete >
-       > reactions;
-      boost::statechart::result react(const ActMap&);
-      boost::statechart::result react(const AdvMap&);
-      boost::statechart::result react(const MNotifyRec&);
-    };
-
-    struct WaitActingChange : boost::statechart::state< WaitActingChange, Primary>,
-                             NamedState {
-      typedef boost::mpl::list <
-       boost::statechart::custom_reaction< AdvMap >,
-       boost::statechart::custom_reaction< MLogRec >,
-       boost::statechart::custom_reaction< MInfoRec >,
-       boost::statechart::custom_reaction< MNotifyRec >
-       > reactions;
-      WaitActingChange(my_context ctx);
-      boost::statechart::result react(const AdvMap&);
-      boost::statechart::result react(const MLogRec&);
-      boost::statechart::result react(const MInfoRec&);
-      boost::statechart::result react(const MNotifyRec&);
-      void exit();
-    };
-
-    struct Incomplete : boost::statechart::state< Incomplete, Primary>,
-                       NamedState {
-      Incomplete(my_context ctx);
-      void exit();
-    };
-    
-    struct GetInfo;
-    struct Active;
-
-    struct Peering : boost::statechart::state< Peering, Primary, GetInfo >, NamedState {
-      std::auto_ptr< PriorSet > prior_set;
-
-      Peering(my_context ctx);
-      void exit();
-
-      typedef boost::mpl::list <
-       boost::statechart::transition< Activate, Active >,
-       boost::statechart::custom_reaction< AdvMap >
-       > reactions;
-      boost::statechart::result react(const AdvMap &advmap);
-    };
-
-    struct Active : boost::statechart::state< Active, Primary >, NamedState {
-      Active(my_context ctx);
-      void exit();
-
-      typedef boost::mpl::list <
-       boost::statechart::custom_reaction< ActMap >,
-       boost::statechart::custom_reaction< AdvMap >,
-       boost::statechart::custom_reaction< MInfoRec >,
-       boost::statechart::custom_reaction< MNotifyRec >,
-       boost::statechart::custom_reaction< MLogRec >,
-       boost::statechart::custom_reaction< RecoveryComplete >
-       > reactions;
-      boost::statechart::result react(const ActMap&);
-      boost::statechart::result react(const AdvMap&);
-      boost::statechart::result react(const MInfoRec& infoevt);
-      boost::statechart::result react(const MNotifyRec& notevt);
-      boost::statechart::result react(const MLogRec& logevt);
-      boost::statechart::result react(const RecoveryComplete&);
+
+      boost::statechart::result react(const MNotifyRec&);
+      boost::statechart::result react(const MInfoRec&);
+      boost::statechart::result react(const MLogRec&);
     };
 
-    struct ReplicaActive : boost::statechart::state< ReplicaActive, Started >, NamedState {
-      ReplicaActive(my_context ctx);
+    struct Reset : boost::statechart::state< Reset, RecoveryMachine >, NamedState {
+      Reset(my_context ctx);
       void exit();
 
       typedef boost::mpl::list <
+       boost::statechart::custom_reaction< AdvMap >,
        boost::statechart::custom_reaction< ActMap >,
-       boost::statechart::custom_reaction< MQuery >,
-       boost::statechart::custom_reaction< MInfoRec >,
-       boost::statechart::custom_reaction< MLogRec >
+       boost::statechart::transition< boost::statechart::event_base, Crashed >
        > reactions;
-      boost::statechart::result react(const MInfoRec& infoevt);
-      boost::statechart::result react(const MLogRec& logevt);
+      boost::statechart::result react(const AdvMap&);
       boost::statechart::result react(const ActMap&);
-      boost::statechart::result react(const MQuery&);
     };
 
-    struct Stray : boost::statechart::state< Stray, Started >, NamedState {
-      map<int, pair<pg_query_t, epoch_t> > pending_queries;
+    struct Start;
 
-      Stray(my_context ctx);
+    struct Started : boost::statechart::state< Started, RecoveryMachine, Start >, NamedState {
+      Started(my_context ctx);
       void exit();
 
       typedef boost::mpl::list <
-       boost::statechart::custom_reaction< MQuery >,
-       boost::statechart::custom_reaction< MLogRec >,
-       boost::statechart::custom_reaction< MInfoRec >,
-       boost::statechart::custom_reaction< ActMap >,
-       boost::statechart::transition< Activate, ReplicaActive >
+       boost::statechart::custom_reaction< AdvMap >,
+       boost::statechart::transition< boost::statechart::event_base, Crashed >
        > reactions;
-      boost::statechart::result react(const MQuery& query);
-      boost::statechart::result react(const MLogRec& logevt);
-      boost::statechart::result react(const MInfoRec& infoevt);
-      boost::statechart::result react(const ActMap&);
+      boost::statechart::result react(const AdvMap&);
     };
 
-    struct GetLog;
-
-    struct GetInfo : boost::statechart::state< GetInfo, Peering >, NamedState {
-      set<int> peer_info_requested;
+    struct MakePrimary : boost::statechart::event< MakePrimary > {
+      MakePrimary() : boost::statechart::event< MakePrimary >() {}
+    };
+    struct MakeStray : boost::statechart::event< MakeStray > {
+      MakeStray() : boost::statechart::event< MakeStray >() {}
+    };
+    struct Primary;
+    struct Stray;
 
-      GetInfo(my_context ctx);
+    struct Start : boost::statechart::state< Start, Started >, NamedState {
+      Start(my_context ctx);
       void exit();
-      void get_infos();
 
       typedef boost::mpl::list <
-       boost::statechart::transition< GotInfo, GetLog >,
-       boost::statechart::custom_reaction< MNotifyRec >
+       boost::statechart::transition< MakePrimary, Primary >,
+       boost::statechart::transition< MakeStray, Stray >
        > reactions;
-      boost::statechart::result react(const MNotifyRec& infoevt);
     };
 
-    struct GetMissing;
-    struct GotLog : boost::statechart::event< GotLog > {
-      GotLog() : boost::statechart::event< GotLog >() {}
+    struct Peering;
+    struct WaitActingChange;
+    struct NeedActingChange : boost::statechart::event< NeedActingChange > {
+      NeedActingChange() : boost::statechart::event< NeedActingChange >() {}
+    };
+    struct Incomplete;
+    struct IsIncomplete : boost::statechart::event< IsIncomplete > {
+      IsIncomplete() : boost::statechart::event< IsIncomplete >() {}
     };
 
-    struct GetLog : boost::statechart::state< GetLog, Peering >, NamedState {
-      int newest_update_osd;
-      MOSDPGLog *msg;
-
-      GetLog(my_context ctx);
-      ~GetLog();
+    struct Primary : boost::statechart::state< Primary, Started, Peering >, NamedState {
+      Primary(my_context ctx);
       void exit();
 
       typedef boost::mpl::list <
-       boost::statechart::custom_reaction< MLogRec >,
-       boost::statechart::custom_reaction< GotLog >
+       boost::statechart::custom_reaction< ActMap >,
+       boost::statechart::custom_reaction< MNotifyRec >,
+       boost::statechart::custom_reaction< AdvMap >,
+       boost::statechart::transition< NeedActingChange, WaitActingChange >,
+       boost::statechart::transition< IsIncomplete, Incomplete >
        > reactions;
-      boost::statechart::result react(const MLogRec& logevt);
-      boost::statechart::result react(const GotLog&);
+      boost::statechart::result react(const ActMap&);
+      boost::statechart::result react(const AdvMap&);
+      boost::statechart::result react(const MNotifyRec&);
     };
 
-    struct WaitUpThru;
-
-    struct GetMissing : boost::statechart::state< GetMissing, Peering >, NamedState {
-      set<int> peer_missing_requested;
-
-      GetMissing(my_context ctx);
-      void exit();
-
+    struct WaitActingChange : boost::statechart::state< WaitActingChange, Primary>,
+                             NamedState {
       typedef boost::mpl::list <
+       boost::statechart::custom_reaction< AdvMap >,
        boost::statechart::custom_reaction< MLogRec >,
-       boost::statechart::transition< NeedUpThru, WaitUpThru >
+       boost::statechart::custom_reaction< MInfoRec >,
+       boost::statechart::custom_reaction< MNotifyRec >
        > reactions;
-      boost::statechart::result react(const MLogRec& logevt);
+      WaitActingChange(my_context ctx);
+      boost::statechart::result react(const AdvMap&);
+      boost::statechart::result react(const MLogRec&);
+      boost::statechart::result react(const MInfoRec&);
+      boost::statechart::result react(const MNotifyRec&);
+      void exit();
     };
 
-    struct WaitUpThru : boost::statechart::state< WaitUpThru, Peering >, NamedState {
-      WaitUpThru(my_context ctx);
+    struct Incomplete : boost::statechart::state< Incomplete, Primary>,
+                       NamedState {
+      Incomplete(my_context ctx);
       void exit();
-
-      typedef boost::mpl::list <
-       boost::statechart::custom_reaction< ActMap >,
-       boost::statechart::custom_reaction< MLogRec >
-       > reactions;
-      boost::statechart::result react(const ActMap& am);
-      boost::statechart::result react(const MLogRec& logrec);
     };
-
-
-    RecoveryMachine machine;
-    PG *pg;
-    RecoveryCtx *rctx;
-
-  public:
-    RecoveryState(PG *pg) : machine(this, pg), pg(pg), rctx(0) {
-      machine.initiate();
-    }
-
-    void handle_notify(int from, pg_info_t& i, RecoveryCtx *ctx);
-    void handle_info(int from, pg_info_t& i, RecoveryCtx *ctx);
-    void handle_log(int from,
-                   MOSDPGLog *msg,
-                   RecoveryCtx *ctx);
-    void handle_query(int from, const pg_query_t& q,
-                     epoch_t query_epoch,
-                     RecoveryCtx *ctx);
-    void handle_advance_map(OSDMapRef osdmap, OSDMapRef lastmap,
-                           vector<int>& newup, vector<int>& newacting, 
-                           RecoveryCtx *ctx);
-    void handle_activate_map(RecoveryCtx *ctx);
-    void handle_recovery_complete(RecoveryCtx *ctx);
-    void handle_create(RecoveryCtx *ctx);
-    void handle_loaded(RecoveryCtx *ctx);
-  } recovery_state;
-
-protected:
-
-  /*
-   * peer_info    -- projected (updates _before_ replicas ack)
-   * peer_missing -- committed (updates _after_ replicas ack)
-   */
-  
-  bool        need_up_thru;
-  set<int>    stray_set;   // non-acting osds that have PG data.
-  eversion_t  oldest_update; // acting: lowest (valid) last_update in active set
-  map<int,pg_info_t>    peer_info;   // info from peers (stray or prior)
-  map<int,pg_missing_t> peer_missing;
-  set<int>             peer_log_requested;  // logs i've requested (and start stamps)
-  set<int>             peer_missing_requested;
-  set<int>             stray_purged;  // i deleted these strays; ignore racing PGInfo from them
-  set<int>             peer_activated;
-
-  // primary-only, recovery-only state
-  set<int>             might_have_unfound;  // These osds might have objects on them
-                                           // which are unfound on the primary
-
-  epoch_t last_peering_reset;
-
-  /**
-   * BackfillInterval
-   *
-   * Represents the objects in a range [begin, end)
-   *
-   * Possible states:
-   * 1) begin == end == hobject_t() indicates the the interval is unpopulated
-   * 2) Else, objects contains all objects in [begin, end)
-   */
-  struct BackfillInterval {
-    // info about a backfill interval on a peer
-    map<hobject_t,eversion_t> objects;
-    hobject_t begin;
-    hobject_t end;
     
-    /// clear content
-    void clear() {
-      objects.clear();
-      begin = end = hobject_t();
-    }
-
-    void reset(hobject_t start) {
-      clear();
-      begin = end = start;
-    }
-
-    /// true if there are no objects in this interval
-    bool empty() {
-      return objects.empty();
-    }
-
-    /// true if interval extends to the end of the range
-    bool extends_to_end() {
-      return end == hobject_t::get_max();
-    }
-
-    /// Adjusts begin to the first object
-    void trim() {
-      if (objects.size())
-       begin = objects.begin()->first;
-      else
-       begin = end;
-    }
-
-    /// drop first entry, and adjust @begin accordingly
-    void pop_front() {
-      assert(!objects.empty());
-      objects.erase(objects.begin());
-      if (objects.empty())
-       begin = end;
-      else
-       begin = objects.begin()->first;
-    }
-  };
-  
-  BackfillInterval backfill_info;
-  BackfillInterval peer_backfill_info;
-  int backfill_target;
-
-  friend class OSD;
-
-public:
-  int get_backfill_target() const {
-    return backfill_target;
-  }
-
-protected:
-
-
-  // pg waiters
-  list<OpRequest*>            waiting_for_active;
-  list<OpRequest*>            waiting_for_all_missing;
-  map<hobject_t, list<OpRequest*> > waiting_for_missing_object,
-                                        waiting_for_degraded_object;
-  map<eversion_t,list<OpRequest*> > waiting_for_ondisk;
-  map<eversion_t,OpRequest*>   replay_queue;
+    struct GetInfo;
+    struct Active;
 
-  void requeue_object_waiters(map<hobject_t, list<OpRequest*> >& m);
+    struct Peering : boost::statechart::state< Peering, Primary, GetInfo >, NamedState {
+      std::auto_ptr< PriorSet > prior_set;
 
-  // stats
-  Mutex pg_stats_lock;
-  bool pg_stats_valid;
-  pg_stat_t pg_stats_stable;
+      Peering(my_context ctx);
+      void exit();
 
-  // for ordering writes
-  ObjectStore::Sequencer osr;
+      typedef boost::mpl::list <
+       boost::statechart::transition< Activate, Active >,
+       boost::statechart::custom_reaction< AdvMap >
+       > reactions;
+      boost::statechart::result react(const AdvMap &advmap);
+    };
 
-  void update_stats();
-  void clear_stats();
+    struct Active : boost::statechart::state< Active, Primary >, NamedState {
+      Active(my_context ctx);
+      void exit();
 
-public:
-  void clear_primary_state();
+      typedef boost::mpl::list <
+       boost::statechart::custom_reaction< ActMap >,
+       boost::statechart::custom_reaction< AdvMap >,
+       boost::statechart::custom_reaction< MInfoRec >,
+       boost::statechart::custom_reaction< MNotifyRec >,
+       boost::statechart::custom_reaction< MLogRec >,
+       boost::statechart::custom_reaction< RecoveryComplete >
+       > reactions;
+      boost::statechart::result react(const ActMap&);
+      boost::statechart::result react(const AdvMap&);
+      boost::statechart::result react(const MInfoRec& infoevt);
+      boost::statechart::result react(const MNotifyRec& notevt);
+      boost::statechart::result react(const MLogRec& logevt);
+      boost::statechart::result react(const RecoveryComplete&);
+    };
 
- public:
-  bool is_acting(int osd) const { 
-    for (unsigned i=0; i<acting.size(); i++)
-      if (acting[i] == osd) return true;
-    return false;
-  }
-  bool is_up(int osd) const { 
-    for (unsigned i=0; i<up.size(); i++)
-      if (up[i] == osd) return true;
-    return false;
-  }
-  
-  bool is_all_uptodate() const;
+    struct ReplicaActive : boost::statechart::state< ReplicaActive, Started >, NamedState {
+      ReplicaActive(my_context ctx);
+      void exit();
 
-  void generate_past_intervals();
-  void trim_past_intervals();
-  void build_prior(std::auto_ptr<PriorSet> &prior_set);
+      typedef boost::mpl::list <
+       boost::statechart::custom_reaction< ActMap >,
+       boost::statechart::custom_reaction< MQuery >,
+       boost::statechart::custom_reaction< MInfoRec >,
+       boost::statechart::custom_reaction< MLogRec >
+       > reactions;
+      boost::statechart::result react(const MInfoRec& infoevt);
+      boost::statechart::result react(const MLogRec& logevt);
+      boost::statechart::result react(const ActMap&);
+      boost::statechart::result react(const MQuery&);
+    };
 
-  void remove_down_peer_info(const OSDMapRef osdmap);
+    struct Stray : boost::statechart::state< Stray, Started >, NamedState {
+      map<int, pair<pg_query_t, epoch_t> > pending_queries;
 
-  bool adjust_need_up_thru(const OSDMapRef osdmap);
+      Stray(my_context ctx);
+      void exit();
 
-  bool all_unfound_are_queried_or_lost(const OSDMapRef osdmap) const;
-  virtual void mark_all_unfound_lost(int how) = 0;
+      typedef boost::mpl::list <
+       boost::statechart::custom_reaction< MQuery >,
+       boost::statechart::custom_reaction< MLogRec >,
+       boost::statechart::custom_reaction< MInfoRec >,
+       boost::statechart::custom_reaction< ActMap >,
+       boost::statechart::transition< Activate, ReplicaActive >
+       > reactions;
+      boost::statechart::result react(const MQuery& query);
+      boost::statechart::result react(const MLogRec& logevt);
+      boost::statechart::result react(const MInfoRec& infoevt);
+      boost::statechart::result react(const ActMap&);
+    };
 
-  bool calc_min_last_complete_ondisk() {
-    eversion_t min = last_complete_ondisk;
-    for (unsigned i=1; i<acting.size(); i++) {
-      if (peer_last_complete_ondisk.count(acting[i]) == 0)
-       return false;   // we don't have complete info
-      eversion_t a = peer_last_complete_ondisk[acting[i]];
-      if (a < min)
-       min = a;
-    }
-    if (min == min_last_complete_ondisk)
-      return false;
-    min_last_complete_ondisk = min;
-    return true;
-  }
+    struct GetLog;
 
-  virtual void calc_trim_to() = 0;
+    struct GetInfo : boost::statechart::state< GetInfo, Peering >, NamedState {
+      set<int> peer_info_requested;
 
-  void proc_replica_log(ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t &olog,
-                       pg_missing_t& omissing, int from);
-  void proc_master_log(ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t &olog,
-                      pg_missing_t& omissing, int from);
-  bool proc_replica_info(int from, pg_info_t &info);
-  bool merge_old_entry(ObjectStore::Transaction& t, pg_log_entry_t& oe);
-  void merge_log(ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t &olog, int from);
-  void rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead);
-  bool search_for_missing(const pg_info_t &oinfo, const pg_missing_t *omissing,
-                         int fromosd);
+      GetInfo(my_context ctx);
+      void exit();
+      void get_infos();
 
-  void check_for_lost_objects();
-  void forget_lost_objects();
+      typedef boost::mpl::list <
+       boost::statechart::transition< GotInfo, GetLog >,
+       boost::statechart::custom_reaction< MNotifyRec >
+       > reactions;
+      boost::statechart::result react(const MNotifyRec& infoevt);
+    };
 
-  void discover_all_missing(std::map< int, map<pg_t,pg_query_t> > &query_map);
-  
-  void trim_write_ahead();
+    struct GetMissing;
+    struct GotLog : boost::statechart::event< GotLog > {
+      GotLog() : boost::statechart::event< GotLog >() {}
+    };
 
-  map<int, pg_info_t>::const_iterator find_best_info(const map<int, pg_info_t> &infos) const;
-  bool calc_acting(int& newest_update_osd, vector<int>& want) const;
-  bool choose_acting(int& newest_update_osd);
-  void build_might_have_unfound();
-  void replay_queued_ops();
-  void activate(ObjectStore::Transaction& t, list<Context*>& tfin,
-               map< int, map<pg_t,pg_query_t> >& query_map,
-               map<int, MOSDPGInfo*> *activator_map=0);
-  void _activate_committed(epoch_t e);
-  void all_activated_and_committed();
+    struct GetLog : boost::statechart::state< GetLog, Peering >, NamedState {
+      int newest_update_osd;
+      MOSDPGLog *msg;
 
-  void proc_primary_info(ObjectStore::Transaction &t, const pg_info_t &info);
+      GetLog(my_context ctx);
+      ~GetLog();
+      void exit();
 
-  bool have_unfound() const { 
-    return missing.num_missing() > missing_loc.size();
-  }
-  int get_num_unfound() const {
-    return missing.num_missing() - missing_loc.size();
-  }
+      typedef boost::mpl::list <
+       boost::statechart::custom_reaction< MLogRec >,
+       boost::statechart::custom_reaction< GotLog >
+       > reactions;
+      boost::statechart::result react(const MLogRec& logevt);
+      boost::statechart::result react(const GotLog&);
+    };
 
-  virtual void clean_up_local(ObjectStore::Transaction& t) = 0;
+    struct WaitUpThru;
 
-  virtual int start_recovery_ops(int max, RecoveryCtx *prctx) = 0;
+    struct GetMissing : boost::statechart::state< GetMissing, Peering >, NamedState {
+      set<int> peer_missing_requested;
 
-  void purge_strays();
+      GetMissing(my_context ctx);
+      void exit();
 
-  Context *finish_sync_event;
+      typedef boost::mpl::list <
+       boost::statechart::custom_reaction< MLogRec >,
+       boost::statechart::transition< NeedUpThru, WaitUpThru >
+       > reactions;
+      boost::statechart::result react(const MLogRec& logevt);
+    };
 
-  void finish_recovery(ObjectStore::Transaction& t, list<Context*>& tfin);
-  void _finish_recovery(Context *c);
-  void cancel_recovery();
-  void clear_recovery_state();
-  virtual void _clear_recovery_state() = 0;
-  void defer_recovery();
-  virtual void check_recovery_op_pulls(const OSDMapRef newmap) = 0;
-  void start_recovery_op(const hobject_t& soid);
-  void finish_recovery_op(const hobject_t& soid, bool dequeue=false);
+    struct WaitUpThru : boost::statechart::state< WaitUpThru, Peering >, NamedState {
+      WaitUpThru(my_context ctx);
+      void exit();
 
-  loff_t get_log_write_pos() {
-    return 0;
-  }
+      typedef boost::mpl::list <
+       boost::statechart::custom_reaction< ActMap >,
+       boost::statechart::custom_reaction< MLogRec >
+       > reactions;
+      boost::statechart::result react(const ActMap& am);
+      boost::statechart::result react(const MLogRec& logrec);
+    };
 
-  friend class C_OSD_RepModify_Commit;
 
+    RecoveryMachine machine;
+    PG *pg;
+    RecoveryCtx *rctx;
 
-  // -- scrub --
-  set<int> scrub_reserved_peers;
-  map<int,ScrubMap> scrub_received_maps;
-  bool finalizing_scrub; 
-  bool scrub_reserved, scrub_reserve_failed;
-  int scrub_waiting_on;
-  epoch_t scrub_epoch_start;
-  ScrubMap primary_scrubmap;
-  MOSDRepScrub *active_rep_scrub;
+  public:
+    RecoveryState(PG *pg) : machine(this, pg), pg(pg), rctx(0) {
+      machine.initiate();
+    }
 
-  void repair_object(const hobject_t& soid, ScrubMap::object *po, int bad_peer, int ok_peer);
-  bool _compare_scrub_objects(ScrubMap::object &auth,
-                             ScrubMap::object &candidate,
-                             ostream &errorstream);
-  void _compare_scrubmaps(const map<int,ScrubMap*> &maps,  
-                         map<hobject_t, set<int> > &missing,
-                         map<hobject_t, set<int> > &inconsistent,
-                         map<hobject_t, int> &authoritative,
-                         ostream &errorstream);
-  void scrub();
-  void scrub_finalize();
-  void scrub_clear_state();
-  bool scrub_gather_replica_maps();
-  void _scan_list(ScrubMap &map, vector<hobject_t> &ls);
-  void _request_scrub_map(int replica, eversion_t version);
-  void build_scrub_map(ScrubMap &map);
-  void build_inc_scrub_map(ScrubMap &map, eversion_t v);
-  virtual int _scrub(ScrubMap &map, int& errors, int& fixed) { return 0; }
-  void clear_scrub_reserved();
-  void scrub_reserve_replicas();
-  void scrub_unreserve_replicas();
-  bool scrub_all_replicas_reserved() const;
-  bool sched_scrub();
+    void handle_notify(int from, pg_info_t& i, RecoveryCtx *ctx);
+    void handle_info(int from, pg_info_t& i, RecoveryCtx *ctx);
+    void handle_log(int from,
+                   MOSDPGLog *msg,
+                   RecoveryCtx *ctx);
+    void handle_query(int from, const pg_query_t& q,
+                     epoch_t query_epoch,
+                     RecoveryCtx *ctx);
+    void handle_advance_map(OSDMapRef osdmap, OSDMapRef lastmap,
+                           vector<int>& newup, vector<int>& newacting, 
+                           RecoveryCtx *ctx);
+    void handle_activate_map(RecoveryCtx *ctx);
+    void handle_recovery_complete(RecoveryCtx *ctx);
+    void handle_create(RecoveryCtx *ctx);
+    void handle_loaded(RecoveryCtx *ctx);
+  } recovery_state;
 
-  void replica_scrub(class MOSDRepScrub *op);
-  void sub_op_scrub_map(OpRequest *op);
-  void sub_op_scrub_reserve(OpRequest *op);
-  void sub_op_scrub_reserve_reply(OpRequest *op);
-  void sub_op_scrub_unreserve(OpRequest *op);
-  void sub_op_scrub_stop(OpRequest *op);
 
  public:  
   PG(OSD *o, PGPool *_pool, pg_t p, const hobject_t& loid, const hobject_t& ioid) : 
@@ -1231,7 +1235,6 @@ public:
     waiting_on_backfill(0),
     role(0),
     state(0),
-    recovery_state(this),
     need_up_thru(false),
     last_peering_reset(0),
     backfill_target(-1),
@@ -1241,7 +1244,8 @@ public:
     finalizing_scrub(false),
     scrub_reserved(false), scrub_reserve_failed(false),
     scrub_waiting_on(0),
-    active_rep_scrub(0)
+    active_rep_scrub(0),
+    recovery_state(this)
   {
     pool->get();
   }