]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
PG,PGLog: replace _merge_old_entry with _merge_object_divergent_entries
authorSamuel Just <sam.just@inktank.com>
Sun, 2 Mar 2014 21:38:12 +0000 (13:38 -0800)
committerSamuel Just <sam.just@inktank.com>
Tue, 4 Mar 2014 00:05:12 +0000 (16:05 -0800)
The _merge_old_entry structure had trouble distinguishing between the
following cases:

missing: foo, 1,1
merge_old_entry modify 1,1 0,0
merge_old_entry modify 1,2 1,1

and
merge_old_entry modify 1,2 1,1

In the first case, we should end up with foo removed from missing
at the end.  In the second, we need foo added to missing at 1,1.
It's far simpler to present all of the divergent entries for a single
object at once.

Signed-off-by: Samuel Just <sam.just@inktank.com>
src/osd/PGLog.cc
src/osd/PGLog.h

index 113bdd213ac52c59af4a364ea4902210e03a5bb9..e563b4e4c9957116159d0f77fd0b0ba52825b954 100644 (file)
@@ -234,167 +234,174 @@ void PGLog::proc_replica_log(
   }
 }
 
-/*
- * merge an old (possibly divergent) log entry into the new log.  this 
- * happens _after_ new log items have been assimilated.  thus, we assume
- * the index already references newer entries (if present), and missing
- * has been updated accordingly.
+/**
+ * _merge_object_divergent_entries
  *
- * return true if entry is not divergent.
+ * There are 5 distinct cases:
+ * 1) There is a more recent update: in this case we assume we adjusted the
+ *    store and missing during merge_log
+ * 2) The first entry in the divergent sequence is a create.  This might
+ *    either be because the object is a clone or because prior_version is
+ *    eversion_t().  In this case the object does not exist and we must
+ *    adjust missing and the store to match.
+ * 3) We are currently missing the object.  In this case, we adjust the
+ *    missing to our prior_version taking care to add a divergent_prior
+ *    if necessary
+ * 4) We can rollback all of the entries.  In this case, we do so using
+ *    the rollbacker and return -- the object does not go into missing.
+ * 5) We cannot rollback at least 1 of the entries.  In this case, we
+ *    clear the object out of the store and add a missing entry at
+ *    prior_version taking care to add a divergent_prior if
+ *    necessary.
  */
-bool PGLog::_merge_old_entry(
-  ObjectStore::Transaction& t,
-  const pg_log_entry_t &oe,
-  const pg_info_t& info,
-  pg_missing_t &missing,
+void PGLog::_merge_object_divergent_entries(
+  const IndexedLog &log,
+  const hobject_t &hoid,
+  const list<pg_log_entry_t> &entries,
+  const pg_info_t &info,
   eversion_t olog_can_rollback_to,
+  pg_missing_t &missing,
   boost::optional<pair<eversion_t, hobject_t> > *new_divergent_prior,
-  LogEntryHandler *rollbacker) const
+  LogEntryHandler *rollbacker
+  )
 {
-  if (oe.soid > info.last_backfill) {
-    dout(20) << "merge_old_entry  had " << oe
-            << " : beyond last_backfill" << dendl;
-    return false;
+  dout(10) << __func__ << ": merging hoid " << hoid
+          << " entries: " << entries << dendl;
+
+  if (hoid > info.last_backfill) {
+    dout(10) << __func__ << ": hoid " << hoid << " after last_backfill"
+            << dendl;
+    return;
   }
-  ceph::unordered_map<hobject_t, pg_log_entry_t*>::const_iterator objiter =
-    log.objects.find(oe.soid);
-  if (objiter != log.objects.end()) {
-    pg_log_entry_t &ne = *(objiter->second); // new(er?) entry
-    
-    if (ne.version > oe.version) {
-      dout(20) << "merge_old_entry  had " << oe
-              << " new " << ne << " : older, missing" << dendl;
-      return false;
-    }
-    if (ne.version == oe.version) {
-      dout(20) << "merge_old_entry  had " << oe
-              << " new " << ne << " : same" << dendl;
-      return true;
+
+  // entries is non-empty
+  assert(!entries.empty());
+  eversion_t last;
+  for (list<pg_log_entry_t>::const_iterator i = entries.begin();
+       i != entries.end();
+       ++i) {
+    // all entries are on hoid
+    assert(i->soid == hoid);
+    if (i != entries.begin() && i->prior_version != eversion_t()) {
+      // in increasing order of version
+      assert(i->version > last);
+      // prior_version correct
+      assert(i->prior_version == last);
     }
+    last = i->version;
 
-    if (missing.is_missing(oe.soid)) {
-      if (!ne.is_delete()) {
-       dout(20) << __func__ << ": ne.version < oe.version && already missing, "
-                << "revising missing need" << dendl;
-       if (rollbacker) {
-         rollbacker->cant_rollback(oe);
-       }
-       missing.revise_need(ne.soid, ne.version);
-      } else {
-       dout(20) << __func__ << ": ne.version < oe.version && already missing, "
-                << "ne is delete, clearing missing need" << dendl;
-       if (rollbacker) {
-         rollbacker->remove(oe.soid);
-         rollbacker->cant_rollback(oe);
-       }
-       missing.rm(oe.soid, oe.version);
-      }
-    } else if (oe.mod_desc.can_rollback() && oe.version > olog_can_rollback_to) {
-      dout(20) << __func__ << ": ne.version < oe.version && can rollback, "
-              << "rolling back " << oe << dendl;
-      if (rollbacker)
-       rollbacker->rollback(oe);
-    } else if (oe.is_delete()) {
-      if (ne.is_delete()) {
-       // old and new are delete
-       dout(20) << "merge_old_entry  had " << oe
-                << " new " << ne << " : both deletes" << dendl;
-      } else {
-       // old delete, new update.
-       dout(20) << "merge_old_entry  had " << oe
-                << " new " << ne << " : missing" << dendl;
-       missing.revise_need(ne.soid, ne.version);
-       if (rollbacker)
-         rollbacker->cant_rollback(oe);
-      }
+    if (rollbacker)
+      rollbacker->trim(*i);
+  }
+
+  const eversion_t prior_version = entries.begin()->prior_version;
+  const eversion_t first_divergent_update = entries.begin()->version;
+  const eversion_t last_divergent_update = entries.rbegin()->version;
+  const bool object_not_in_store =
+    !missing.is_missing(hoid) &&
+    entries.rbegin()->is_delete();
+  dout(10) << __func__ << ": hoid " << hoid
+          << " prior_version: " << prior_version
+          << " first_divergent_update: " << first_divergent_update
+          << " last_divergent_update: " << last_divergent_update
+          << dendl;
+
+  ceph::unordered_map<hobject_t, pg_log_entry_t*>::const_iterator objiter =
+    log.objects.find(hoid);
+  if (objiter != log.objects.end() &&
+      objiter->second->version >= first_divergent_update) {
+    /// Case 1)
+    assert(objiter->second->version > last_divergent_update);
+
+    dout(10) << __func__ << ": more recent entry found: "
+            << *objiter->second << ", already merged" << dendl;
+
+    // ensure missing has been updated appropriately
+    if (objiter->second->is_update()) {
+      assert(missing.is_missing(hoid) &&
+            missing.missing[hoid].need == objiter->second->version);
     } else {
-      if (ne.is_delete()) {
-       // old update, new delete
-       dout(20) << "merge_old_entry  had " << oe
-                << " new " << ne << " : new delete supercedes" << dendl;
-       if (rollbacker) {
-         rollbacker->remove(oe.soid);
-         rollbacker->cant_rollback(oe);
-       }
-       assert(!missing.is_missing(oe.soid));
-      } else {
-       // old update, new update
-       dout(20) << "merge_old_entry  had " << oe
-                << " new " << ne << " : new item supercedes" << dendl;
-       missing.revise_need(ne.soid, ne.version);
-       if (rollbacker)
-         rollbacker->cant_rollback(oe);
-      }
+      assert(!missing.is_missing(hoid));
     }
-  } else if (oe.op == pg_log_entry_t::CLONE) {
-    assert(oe.soid.snap != CEPH_NOSNAP);
-    dout(20) << "merge_old_entry  had " << oe
-            << ", clone with no non-divergent log entries, "
-            << "deleting" << dendl;
-    if (missing.is_missing(oe.soid)) {
-      missing.rm(oe.soid, missing.missing[oe.soid].need);
+    missing.revise_have(hoid, eversion_t());
+    if (rollbacker && !object_not_in_store)
+      rollbacker->remove(hoid);
+    return;
+  }
+
+  dout(10) << __func__ << ": hoid " << hoid
+          <<" has no more recent entries in log" << dendl;
+  if (prior_version == eversion_t() || entries.front().is_clone()) {
+    /// Case 2)
+    dout(10) << __func__ << ": hoid " << hoid
+            << " prior_version or op type indicates creation, deleting"
+            << dendl;
+    if (missing.is_missing(hoid))
+      missing.rm(missing.missing.find(hoid));
+    if (rollbacker && !object_not_in_store)
+      rollbacker->remove(hoid);
+    return;
+  }
+
+  if (missing.is_missing(hoid)) {
+    /// Case 3)
+    dout(10) << __func__ << ": hoid " << hoid
+            << " missing, adjusting missing version" << dendl;
+    missing.revise_need(hoid, prior_version);
+    if (prior_version <= info.log_tail) {
+      dout(10) << __func__ << ": hoid " << hoid
+              << " prior_version " << prior_version << " <= info.log_tail "
+              << info.log_tail << dendl;
+      if (new_divergent_prior)
+       *new_divergent_prior = make_pair(prior_version, hoid);
     }
+    return;
+  }
 
-    if (oe.mod_desc.can_rollback() && oe.version > olog_can_rollback_to) {
-      dout(20) << __func__ << ": rolling back " << oe << dendl;
-      if (rollbacker)
-       rollbacker->rollback(oe);
-    } else {
-      dout(20) << __func__ << ": had " << oe << " deleting" << dendl;
+  dout(10) << __func__ << ": hoid " << hoid
+          << " must be rolled back or recovered, attempting to rollback"
+          << dendl;
+  bool can_rollback = true;
+  /// Distinguish between 4) and 5)
+  for (list<pg_log_entry_t>::const_reverse_iterator i = entries.rbegin();
+       i != entries.rend();
+       ++i) {
+    if (!i->mod_desc.can_rollback() || i->version <= olog_can_rollback_to) {
+      dout(10) << __func__ << ": hoid " << hoid << " cannot rollback "
+              << *i << dendl;
+      can_rollback = false;
+      break;
+    }
+  }
+
+  if (can_rollback) {
+    /// Case 4)
+    for (list<pg_log_entry_t>::const_reverse_iterator i = entries.rbegin();
+        i != entries.rend();
+        ++i) {
+      assert(i->mod_desc.can_rollback() && i->version > olog_can_rollback_to);
+      dout(10) << __func__ << ": hoid " << hoid
+              << " rolling back " << *i << dendl;
       if (rollbacker)
-       rollbacker->remove(oe.soid);
+       rollbacker->rollback(*i);
     }
-  } else if (oe.prior_version > info.log_tail && missing.is_missing(oe.soid)) {
-    /**
-     * oe.prior_version is a previously divergent log entry
-     * oe.soid must have already been handled and the missing
-     * set updated appropriately
-     */
-    dout(20) << "merge_old_entry  had oe " << oe
-            << " with divergent prior_version " << oe.prior_version
-            << " oe.soid " << oe.soid
-            << " must already have been merged" << dendl;
+    dout(10) << __func__ << ": hoid " << hoid << " rolled back" << dendl;
+    return;
   } else {
-    if (missing.is_missing(oe.soid)) {
-      if (oe.prior_version > eversion_t()) {
-       dout(20) << __func__ << ": oe.prior_version > 0 && already missing, "
-                << "revising missing need " << oe << dendl;
-       if (new_divergent_prior)
-         *new_divergent_prior = make_pair(oe.prior_version, oe.soid);
-       missing.revise_need(oe.soid, oe.prior_version);
-      } else {
-       dout(20) << __func__ << ": oe.prior_version == 0 && already missing, "
-                << "removing missing " << oe << dendl;
-       missing.rm(oe.soid, missing.missing[oe.soid].need);
-       if (rollbacker)
-         rollbacker->remove(oe.soid);
-      }
-      if (rollbacker)
-       rollbacker->cant_rollback(oe);
-    } else if (oe.mod_desc.can_rollback() && oe.version > olog_can_rollback_to) {
-      dout(20) << __func__ << ": rolling back " << oe << dendl;
-      if (rollbacker)
-       rollbacker->rollback(oe);
-    } else {
-      if (!oe.is_delete()) {
-       if (rollbacker)
-         rollbacker->remove(oe.soid);
-       dout(20) << __func__ << ": had " << oe << " deleting" << dendl;
-      }
-      dout(20) << "merge_old_entry  had " << oe << " updating missing to "
-              << oe.prior_version << dendl;
-      if (oe.prior_version > eversion_t()) {
-       if (new_divergent_prior)
-         *new_divergent_prior = make_pair(oe.prior_version, oe.soid);
-       missing.revise_need(oe.soid, oe.prior_version);
-       if (rollbacker)
-         rollbacker->cant_rollback(oe);
-      } else {
-       assert(!missing.is_missing(oe.soid));
-      }
+    /// Case 5)
+    dout(10) << __func__ << ": hoid " << hoid << " cannot roll back, "
+            << "removing and adding to missing" << dendl;
+    if (rollbacker && !object_not_in_store)
+      rollbacker->remove(hoid);
+    missing.add(hoid, prior_version, eversion_t());
+    if (prior_version <= info.log_tail) {
+      dout(10) << __func__ << ": hoid " << hoid
+              << " prior_version " << prior_version << " <= info.log_tail "
+              << info.log_tail << dendl;
+      if (new_divergent_prior)
+       *new_divergent_prior = make_pair(prior_version, hoid);
     }
   }
-  return false;
 }
 
 /**
@@ -438,11 +445,22 @@ void PGLog::rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead
     info.last_complete = newhead;
 
   log.index();
-  for (list<pg_log_entry_t>::iterator d = divergent.begin();
-       d != divergent.end();
-       ++d) {
-    merge_old_entry(t, *d, info, rollbacker);
-    rollbacker->trim(*d);
+
+  map<eversion_t, hobject_t> new_priors;
+  _merge_divergent_entries(
+    log,
+    divergent,
+    info,
+    log.can_rollback_to,
+    missing,
+    &new_priors,
+    rollbacker);
+  for (map<eversion_t, hobject_t>::iterator i = new_priors.begin();
+       i != new_priors.end();
+       ++i) {
+    add_divergent_prior(
+      i->first,
+      i->second);
   }
 
   if (info.last_update < log.can_rollback_to)
@@ -578,12 +596,21 @@ void PGLog::merge_log(ObjectStore::Transaction& t,
     info.last_user_version = oinfo.last_user_version;
     info.purged_snaps = oinfo.purged_snaps;
 
-    // process divergent items
-    for (list<pg_log_entry_t>::iterator d = divergent.begin();
-        d != divergent.end();
-        ++d) {
-      merge_old_entry(t, *d, info, rollbacker);
-      rollbacker->trim(*d);
+    map<eversion_t, hobject_t> new_priors;
+    _merge_divergent_entries(
+      log,
+      divergent,
+      info,
+      log.can_rollback_to,
+      missing,
+      &new_priors,
+      rollbacker);
+    for (map<eversion_t, hobject_t>::iterator i = new_priors.begin();
+        i != new_priors.end();
+        ++i) {
+      add_divergent_prior(
+       i->first,
+       i->second);
     }
 
     // We cannot rollback into the new log entries
index 9d994eeebb51b06fe4faa99a746d8a5d6318b773..9309841326d6ed519bddeb8fb29816c752e11e14 100644 (file)
@@ -34,8 +34,6 @@ struct PGLog {
       const hobject_t &hoid) = 0;
     virtual void trim(
       const pg_log_entry_t &entry) = 0;
-    virtual void cant_rollback(
-      const pg_log_entry_t &entry) = 0;
     virtual ~LogEntryHandler() {}
   };
 
@@ -388,30 +386,87 @@ public:
                        pg_missing_t& omissing, pg_shard_t from) const;
 
 protected:
-  bool _merge_old_entry(
-    ObjectStore::Transaction& t,
-    const pg_log_entry_t &oe,
-    const pg_info_t& info,
-    pg_missing_t &missing,
-    eversion_t olog_can_rollback_to,
+  static void split_by_object(
+    list<pg_log_entry_t> &entries,
+    map<hobject_t, list<pg_log_entry_t> > *out_entries) {
+    while (!entries.empty()) {
+      list<pg_log_entry_t> &out_list = (*out_entries)[entries.front().soid];
+      out_list.splice(out_list.end(), entries, entries.begin());
+    }
+  }
+
+  /**
+   * Merge complete list of divergent entries for an object
+   *
+   * @param new_divergent_prior [out] filled out for a new divergent prior
+   */
+  static void _merge_object_divergent_entries(
+    const IndexedLog &log,               ///< [in] log to merge against
+    const hobject_t &hoid,               ///< [in] object we are merging
+    const list<pg_log_entry_t> &entries, ///< [in] entries for hoid to merge
+    const pg_info_t &oinfo,              ///< [in] info for merging entries
+    eversion_t olog_can_rollback_to,     ///< [in] rollback boundary
+    pg_missing_t &omissing,              ///< [in,out] missing to adjust, use
     boost::optional<pair<eversion_t, hobject_t> > *new_divergent_prior,
-    LogEntryHandler *rollbacker) const;
-  bool merge_old_entry(
+    LogEntryHandler *rollbacker          ///< [in] optional rollbacker object
+    );
+
+  /// Merge all entries using above
+  static void _merge_divergent_entries(
+    const IndexedLog &log,               ///< [in] log to merge against
+    list<pg_log_entry_t> &entries,       ///< [in] entries to merge
+    const pg_info_t &oinfo,              ///< [in] info for merging entries
+    eversion_t olog_can_rollback_to,     ///< [in] rollback boundary
+    pg_missing_t &omissing,              ///< [in,out] missing to adjust, use
+    map<eversion_t, hobject_t> *priors,  ///< [out] target for new priors
+    LogEntryHandler *rollbacker          ///< [in] optional rollbacker object
+    ) {
+    map<hobject_t, list<pg_log_entry_t> > split;
+    split_by_object(entries, &split);
+    for (map<hobject_t, list<pg_log_entry_t> >::iterator i = split.begin();
+        i != split.end();
+        ++i) {
+      boost::optional<pair<eversion_t, hobject_t> > new_divergent_prior;
+      _merge_object_divergent_entries(
+       log,
+       i->first,
+       i->second,
+       oinfo,
+       olog_can_rollback_to,
+       omissing,
+       &new_divergent_prior,
+       rollbacker);
+      if (priors && new_divergent_prior) {
+       (*priors)[new_divergent_prior->first] = new_divergent_prior->second;
+      }
+    }
+  }
+
+  /**
+   * Exists for use in TestPGLog for simply testing single divergent log
+   * cases
+   */
+  void merge_old_entry(
     ObjectStore::Transaction& t,
     const pg_log_entry_t& oe,
     const pg_info_t& info,
     LogEntryHandler *rollbacker) {
     boost::optional<pair<eversion_t, hobject_t> > new_divergent_prior;
-    bool merged = _merge_old_entry(
-      t, oe, info, missing,
+    list<pg_log_entry_t> entries;
+    entries.push_back(oe);
+    _merge_object_divergent_entries(
+      log,
+      oe.soid,
+      entries,
+      info,
       log.can_rollback_to,
+      missing,
       &new_divergent_prior,
       rollbacker);
     if (new_divergent_prior)
       add_divergent_prior(
        (*new_divergent_prior).first,
        (*new_divergent_prior).second);
-    return merged;
   }
 public:
   void rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead,