ErasureCode: update PGBackend description

author Loic Dachary <loic@dachary.org>

Wed, 2 Oct 2013 13:06:27 +0000 (15:06 +0200)

committer Loic Dachary <loic@dachary.org>

Wed, 2 Oct 2013 13:58:01 +0000 (15:58 +0200)
author Loic Dachary <loic@dachary.org>
Wed, 2 Oct 2013 13:06:27 +0000 (15:06 +0200)
committer Loic Dachary <loic@dachary.org>
Wed, 2 Oct 2013 13:58:01 +0000 (15:58 +0200)
diff --git a/doc/dev/osd_internals/erasure_coding.rst b/doc/dev/osd_internals/erasure_coding.rst

index d3f19b6fb8e51a22cf4b23de026a14b17d0da1e0..0586c46c3bb64b438b9b90e3da8cda155d5610ff 100644 (file)
--- a/doc/dev/osd_internals/erasure_coding.rst
+++ b/doc/dev/osd_internals/erasure_coding.rst
@@ -66,4 +66,3 @@ Table of content
     Developer notes <erasure_coding/developer_notes>
     Jerasure plugin <erasure_coding/jerasure>
     High level design document <erasure_coding/pgbackend>
-   Draft PGBackend.h header <erasure_coding/PGBackend-h>
diff --git a/doc/dev/osd_internals/erasure_coding/PGBackend-h.rst b/doc/dev/osd_internals/erasure_coding/PGBackend-h.rst

deleted file mode 100644 (file)

index b39cdb0..0000000
--- a/doc/dev/osd_internals/erasure_coding/PGBackend-h.rst
+++ /dev/null
@@ -1,156 +0,0 @@
-===========
-PGBackend.h
-===========
-
-Work in progress:
-::
- 
- /**
-  * PGBackend
-  *
-  * PGBackend defines an interface for logic handling IO and
-  * replication on RADOS objects.  The PGBackend implementation
-  * is responsible for:
-  *
-  * 1) Handling client operations
-  * 2) Handling object recovery
-  * 3) Handling object access
-  */
- class PGBackend {
- public:       
-   /// IO
- 
-   /// Perform write
-   int perform_write(
-     const vector<OSDOp> &ops,  ///< [in] ops to perform
-     Context *onreadable,       ///< [in] called when readable on all reaplicas
-     Context *onreadable,       ///< [in] called when durable on all replicas
-     ) = 0; ///< @return 0 or error
- 
-   /// Attempt to roll back a log entry
-   int try_rollback(
-     const pg_log_entry_t &entry, ///< [in] entry to roll back
-     ObjectStore::Transaction *t  ///< [out] transaction
-     ) = 0; ///< @return 0 on success, -EINVAL if it can't be rolled back
- 
-   /// Perform async read, oncomplete is called when ops out_bls are filled in
-   int perform_read(
-     vector<OSDOp> &ops,        ///< [in, out] ops
-     Context *oncomplete        ///< [out] called with r code
-     ) = 0; ///< @return 0 or error
- 
-   /// Peering
- 
-   /**
-    * have_enough_infos
-    *
-    * Allows PGBackend implementation to ensure that enough peers have
-    * been contacted to satisfy its requirements.
-    *
-    * TODO: this interface should yield diagnostic info about which infos
-    * are required
-    */
-   bool have_enough_infos(
-     const map<epoch_t, pg_interval_t> &past_intervals,      ///< [in] intervals
-     const map<chunk_id_t, map<int, pg_info_t> > &peer_infos ///< [in] infos
-     ) = 0; ///< @return true if we can continue peering
- 
-   /**
-    * choose_acting
-    *
-    * Allows PGBackend implementation to select the acting set based on the
-    * received infos
-    *
-    * @return False if the current acting set is inadequate, *req_acting will
-    *         be filled in with the requested new acting set.  True if the
-    *         current acting set is adequate, *auth_log will be filled in
-    *         with the correct location of the authoritative log.
-    */
-   bool choose_acting(
-     const map<int, pg_info_t> &peer_infos, ///< [in] received infos
-     int *auth_log,                         ///< [out] osd with auth log
-     vector<int> *req_acting                ///< [out] requested acting set
-     ) = 0;
- 
-   /// Scrub
- 
-   /// scan
-   int scan(
-     const hobject_t &start, ///< [in] scan objects >= start
-     const hobject_t &up_to, ///< [in] scan objects < up_to
-     vector<hobject_t> *out  ///< [out] objects returned
-     ) = 0; ///< @return 0 or error
- 
-   /// stat (TODO: ScrubMap::object needs to have PGBackend specific metadata)
-   int scrub(
-     const hobject_t &to_stat, ///< [in] object to stat
-     bool deep,                ///< [in] true if deep scrub
-     ScrubMap::object *o       ///< [out] result
-     ) = 0; ///< @return 0 or error
- 
-   /**
-    * compare_scrub_maps
-    *
-    * @param inconsistent [out] map of inconsistent pgs to pair<correct, incorrect>
-    * @param errstr [out] stream of text about inconsistencies for user
-    *                     perusal
-    *
-    * TODO: this interface doesn't actually make sense...
-    */
-   void compare_scrub_maps(
-     const map<int, ScrubMap> &maps, ///< [in] maps to compare
-     bool deep,                      ///< [in] true if scrub is deep
-     map<hobject_t, pair<set<int>, set<int> > > *inconsistent,
-     std:ostream *errstr
-     ) = 0;
- 
-   /// Recovery
- 
-   /**
-    * might_have_unrecoverable
-    *
-    * @param missing [in] missing,info gathered so far (must include acting)
-    * @param intervals [in] past intervals
-    * @param should_query [out] pair<int, cpg_t> shards to query
-    */
-   void might_have_unrecoverable(
-     const map<chunk_id_t, map<int, pair<pg_info_t, pg_missing_t> > &missing,
-     const map<epoch_t, pg_interval_t> &past_intervals,
-     set<pair<int, cpg_t> > *should_query
-     ) = 0;
- 
-   /**
-    * might_have_unfound
-    *
-    * @param missing [in] missing,info gathered so far (must include acting)
-    */
-   bool recoverable(
-     const map<chunk_id_t, map<int, pair<pg_info_t, pg_missing_t> > &missing,
-     const hobject_t &hoid ///< [in] object to check
-     ) = 0; ///< @return true if object can be recovered given missing
- 
-   /**
-    * recover_object
-    *
-    * Triggers a recovery operation on the specified hobject_t
-    * onreadable must be called before onwriteable
-    *
-    * @param missing [in] set of info, missing pairs for queried nodes
-    */
-   void recover_object(
-     const hobject_t &hoid, ///< [in] object to recover
-     const map<chunk_id_t, map<int, pair<pg_info_t, pg_missing_t> > &missing
-     Context *onreadable,   ///< [in] called when object can be read
-     Context *onwriteable   ///< [in] called when object can be written
-     ) = 0;
- 
-   /// Backfill
- 
-   /// choose_backfill
-   void choose_backfill(
-     const map<chunk_id_t, map<int, pg_info_t> > &peer_infos ///< [in] infos
-     const vector<int> &acting, ///< [in] acting set
-     const vector<int> &up,     ///< [in] up set
-     set<int> *to_backfill      ///< [out] osds to backfill
-     ) = 0;
- };
diff --git a/doc/dev/osd_internals/erasure_coding/developer_notes.rst b/doc/dev/osd_internals/erasure_coding/developer_notes.rst

index 568b2b4634aa3b9f9cd783d6f92a5ef1d1bff6be..454f087fe53f0237c9d2e7315573caa0eba05fcd 100644 (file)
--- a/doc/dev/osd_internals/erasure_coding/developer_notes.rst
+++ b/doc/dev/osd_internals/erasure_coding/developer_notes.rst
@@ -495,8 +495,7 @@ registry. The `ErasureCodePluginExample <https://github.com/ceph/ceph/blob/08a97
  
  The *ErasureCodePlugin* derived object must provide a factory method
  from which the concrete implementation of the *ErasureCodeInterface*
-object can be generated. The `ErasureCodePluginExample <https://github.com/ceph/ceph/blob/08a97ae45f4df58a6a8ea8a6400934d860cf5eb4/src/test/osd/ErasureCodePluginExample.cc#L22>`_ plugin
-reads:
+object can be generated. The `ErasureCodePluginExample plugin <https://github.com/ceph/ceph/blob/08a97ae45f4df58a6a8ea8a6400934d860cf5eb4/src/test/osd/ErasureCodePluginExample.cc#L22>`_ reads:
  
  ::
   
diff --git a/doc/dev/osd_internals/erasure_coding/pgbackend.rst b/doc/dev/osd_internals/erasure_coding/pgbackend.rst

index c16354f5116984028251d8cf3fbeafedd36246b7..43415ba4f7e7beba4476503ca529f815ae14d053 100644 (file)
--- a/doc/dev/osd_internals/erasure_coding/pgbackend.rst
+++ b/doc/dev/osd_internals/erasure_coding/pgbackend.rst
@@ -2,14 +2,13 @@
  PG Backend Proposal
  ===================
  
-See also `PGBackend.h <../PGBackend-h>`_
-
  Motivation
  ----------
  
-The purpose of the PG Backend interface is to abstract over the
-differences between replication and erasure coding as failure recovery
-mechanisms.
+The purpose of the `PG Backend interface
+<https://github.com/ceph/ceph/blob/a287167cf8625165249b7636540591aefc0a693d/src/osd/PGBackend.h>`_
+is to abstract over the differences between replication and erasure
+coding as failure recovery mechanisms.
  
  Much of the existing PG logic, particularly that for dealing with
  peering, will be common to each.  With both schemes, a log of recent
@@ -34,12 +33,12 @@ and erasure coding which PGBackend must abstract over:
     positions are not interchangeable.  In particular, it might make
     sense for a single OSD to hold more than 1 PG copy for different
     acting set positions.
-5. Selection of a pgtemp for backfill may difer between replicated
+5. Selection of a pgtemp for backfill may differ between replicated
     and erasure coded backends.
  6. The set of necessary osds from a particular interval required to
-   to continue peering may difer between replicated and erasure
+   to continue peering may differ between replicated and erasure
     coded backends.
-7. The selection of the authoritative log may difer between replicated
+7. The selection of the authoritative log may differ between replicated
     and erasure coded backends.
  
  Client Writes
@@ -78,8 +77,9 @@ Core Changes:
  - Current code should be adapted to use and rollback as appropriate
    APPEND, DELETE, (SET|RM)ATTR log entries.
  - The filestore needs to be able to deal with multiply versioned
-  hobjects.  This probably means adapting the filestore internally to
-  use a ghobject which is basically a tuple<hobject_t, gen_t,
+  hobjects.  This means adapting the filestore internally to
+  use a `ghobject <https://github.com/ceph/ceph/blob/aba6efda13eb6ab4b96930e9cc2dbddebbe03f26/src/common/hobject.h#L193>`_ 
+  which is basically a tuple<hobject_t, gen_t,
    shard_t>.  The gen_t + shard_t need to be included in the on-disk
    filename.  gen_t is a unique object identifier to make sure there
    are no name collisions when object N is created +
@@ -114,7 +114,7 @@ divergent objects.  Thus, we must choose the *oldest* last_update from
  the last interval which went active in order to minimize the number of
  divergent objects.
  
-The dificulty is that the current code assumes that as long as it has
+The difficulty is that the current code assumes that as long as it has
  an info from at least 1 osd from the prior interval, it can complete
  peering.  In order to ensure that we do not end up with an
  unrecoverably divergent object, a K+M erasure coded PG must hear from at
@@ -161,7 +161,7 @@ Client Reads
  ------------
  
  Reads with the replicated strategy can always be satisfied
-syncronously out of the primary osd.  With an erasure coded strategy,
+synchronously out of the primary osd.  With an erasure coded strategy,
  the primary will need to request data from some number of replicas in
  order to satisfy a read.  The perform_read() interface for PGBackend
  therefore will be async.
@@ -192,7 +192,7 @@ include the chunk id in the object key.
  Core changes:
  
  - The filestore `ghobject_t needs to also include a chunk id
-  <http://tracker.ceph.com/issues/5862>`_ making it more like
+  <https://github.com/ceph/ceph/blob/aba6efda13eb6ab4b96930e9cc2dbddebbe03f26/src/common/hobject.h#L193>`_ making it more like
    tuple<hobject_t, gen_t, shard_t>.
  - coll_t needs to include a shard_t.
  - The `OSD pg_map and similar pg mappings need to work in terms of a
@@ -260,7 +260,7 @@ Core changes:
  Recovery
  --------
  
-See `Issue #5857`_. The logic for recovering an object depends on the backend.  With
+The logic for recovering an object depends on the backend.  With
  the current replicated strategy, we first pull the object replica
  to the primary and then concurrently push it out to the replicas.
  With the erasure coded strategy, we probably want to read the
@@ -270,7 +270,7 @@ and push out the replacement chunks concurrently.
  Another difference is that objects in erasure coded pg may be
  unrecoverable without being unfound.  The "unfound" concept
  should probably then be renamed to unrecoverable.  Also, the
-PGBackend impementation will have to be able to direct the search
+PGBackend implementation will have to be able to direct the search
  for pg replicas with unrecoverable object chunks and to be able
  to determine whether a particular object is recoverable.
  
@@ -281,9 +281,11 @@ Core changes:
  
  PGBackend interfaces:
  
-- might_have_unrecoverable()
-- recoverable()
-- recover_object()
+- `on_local_recover_start <https://github.com/ceph/ceph/blob/a287167cf8625165249b7636540591aefc0a693d/src/osd/PGBackend.h#L46>`_
+- `on_local_recover <https://github.com/ceph/ceph/blob/a287167cf8625165249b7636540591aefc0a693d/src/osd/PGBackend.h#L52>`_
+- `on_global_recover <https://github.com/ceph/ceph/blob/a287167cf8625165249b7636540591aefc0a693d/src/osd/PGBackend.h#L64>`_
+- `on_peer_recover <https://github.com/ceph/ceph/blob/a287167cf8625165249b7636540591aefc0a693d/src/osd/PGBackend.h#L69>`_
+- `begin_peer_recover <https://github.com/ceph/ceph/blob/a287167cf8625165249b7636540591aefc0a693d/src/osd/PGBackend.h#L76>`_
  
  Backfill
  --------
@@ -316,6 +318,4 @@ PGBackend interfaces:
  - choose_backfill(): allows the implementation to determine which osds
    should be backfilled in a particular interval.
  
-
-.. _Issue #5857: http://tracker.ceph.com/issues/5857
-.. _Issue #5856: http://tracker.ceph.com/issues/5856
-\ No newline at end of file
+.. _Issue #5856: http://tracker.ceph.com/issues/5856
diff --git a/doc/dev/osd_internals/erasure_coding/recovery.rst b/doc/dev/osd_internals/erasure_coding/recovery.rst

deleted file mode 100644 (file)

index 793a5b0..0000000
--- a/doc/dev/osd_internals/erasure_coding/recovery.rst
+++ /dev/null
@@ -1,4 +0,0 @@
-===================
-PGBackend Recovery
-===================
-
author	Loic Dachary <loic@dachary.org>
	Wed, 2 Oct 2013 13:06:27 +0000 (15:06 +0200)
committer	Loic Dachary <loic@dachary.org>
	Wed, 2 Oct 2013 13:58:01 +0000 (15:58 +0200)
doc/dev/osd_internals/erasure_coding.rst		patch \| blob \| history
doc/dev/osd_internals/erasure_coding/PGBackend-h.rst	[deleted file]	patch \| blob \| history
doc/dev/osd_internals/erasure_coding/developer_notes.rst		patch \| blob \| history
doc/dev/osd_internals/erasure_coding/pgbackend.rst		patch \| blob \| history
doc/dev/osd_internals/erasure_coding/recovery.rst	[deleted file]	patch \| blob \| history