]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
PGLog: maintain writeout_from and trimmed
authorSamuel Just <sam.just@inktank.com>
Tue, 27 Aug 2013 15:49:14 +0000 (08:49 -0700)
committerSamuel Just <sam.just@inktank.com>
Fri, 6 Sep 2013 23:06:12 +0000 (16:06 -0700)
This way, we can avoid omap_rmkeyrange in the common append
and trim cases.

Fixes: #6040
Backport: Dumpling
Signed-off-by: Samuel Just <sam.just@inktank.com>
(cherry picked from commit f808c205c503f7d32518c91619f249466f84c4cf)

src/osd/PGLog.cc
src/osd/PGLog.h

index 0e18710ad18ee9631b8612ab2243e2eb4ae040be..a6e538c75e75eac51b9a1b993b2be69bda60fba4 100644 (file)
@@ -64,7 +64,7 @@ void PGLog::IndexedLog::split_into(
   index();
 }
 
-void PGLog::IndexedLog::trim(eversion_t s)
+void PGLog::IndexedLog::trim(eversion_t s, set<eversion_t> *trimmed)
 {
   if (complete_to != log.end() &&
       complete_to->version <= s) {
@@ -77,6 +77,8 @@ void PGLog::IndexedLog::trim(eversion_t s)
     if (e.version > s)
       break;
     generic_dout(20) << "trim " << e << dendl;
+    if (trimmed)
+      trimmed->insert(e.version);
     unindex(e);         // remove from index,
     log.pop_front();    // from log
   }
@@ -142,14 +144,8 @@ void PGLog::trim(eversion_t trim_to, pg_info_t &info)
     assert(trim_to <= info.last_complete);
 
     dout(10) << "trim " << log << " to " << trim_to << dendl;
-    log.trim(trim_to);
+    log.trim(trim_to, &trimmed);
     info.log_tail = log.tail;
-
-    if (log.log.empty()) {
-      mark_dirty_to(eversion_t::max());
-    } else {
-      mark_dirty_to(log.log.front().version);
-    }
   }
 }
 
@@ -541,11 +537,15 @@ void PGLog::write_log(
             << "dirty_to: " << dirty_to
             << ", dirty_from: " << dirty_from
             << ", dirty_divergent_priors: " << dirty_divergent_priors
+            << ", writeout_from: " << writeout_from
+            << ", trimmed: " << trimmed
             << dendl;
     _write_log(
       t, log, log_oid, divergent_priors,
       dirty_to,
       dirty_from,
+      writeout_from,
+      trimmed,
       dirty_divergent_priors,
       !touched_log,
       (pg_log_debug ? &log_keys_debug : 0));
@@ -558,8 +558,11 @@ void PGLog::write_log(
 void PGLog::write_log(ObjectStore::Transaction& t, pg_log_t &log,
     const hobject_t &log_oid, map<eversion_t, hobject_t> &divergent_priors)
 {
-  _write_log(t, log, log_oid, divergent_priors, eversion_t::max(), eversion_t(),
-            true, true, 0);
+  _write_log(
+    t, log, log_oid,
+    divergent_priors, eversion_t::max(), eversion_t(), eversion_t(),
+    set<eversion_t>(),
+    true, true, 0);
 }
 
 void PGLog::_write_log(
@@ -567,11 +570,24 @@ void PGLog::_write_log(
   const hobject_t &log_oid, map<eversion_t, hobject_t> &divergent_priors,
   eversion_t dirty_to,
   eversion_t dirty_from,
+  eversion_t writeout_from,
+  const set<eversion_t> &trimmed,
   bool dirty_divergent_priors,
   bool touch_log,
   set<string> *log_keys_debug
   )
 {
+  set<string> to_remove;
+  for (set<eversion_t>::const_iterator i = trimmed.begin();
+       i != trimmed.end();
+       ++i) {
+    to_remove.insert(i->get_key_name());
+    if (log_keys_debug) {
+      assert(log_keys_debug->count(i->get_key_name()));
+      log_keys_debug->erase(i->get_key_name());
+    }
+  }
+
 //dout(10) << "write_log, clearing up to " << dirty_to << dendl;
   if (touch_log)
     t.touch(coll_t(), log_oid);
@@ -599,7 +615,8 @@ void PGLog::_write_log(
   }
 
   for (list<pg_log_entry_t>::reverse_iterator p = log.log.rbegin();
-       p != log.log.rend() && p->version >= dirty_from &&
+       p != log.log.rend() &&
+        (p->version >= dirty_from || p->version >= writeout_from) &&
         p->version >= dirty_to;
        ++p) {
     bufferlist bl(sizeof(*p) * 2);
@@ -621,6 +638,7 @@ void PGLog::_write_log(
     ::encode(divergent_priors, keys["divergent_priors"]);
   }
 
+  t.omap_rmkeys(coll_t::META_COLL, log_oid, to_remove);
   t.omap_setkeys(coll_t::META_COLL, log_oid, keys);
 }
 
index b524c60c00a4799f36ec96e95d254e3e3a8f9aac..871aa1ab7dd8b62578a9e93f3e4cc184cbca01e5 100644 (file)
@@ -142,7 +142,7 @@ struct PGLog {
        caller_ops[e.reqid] = &(log.back());
     }
 
-    void trim(eversion_t s);
+    void trim(eversion_t s, set<eversion_t> *trimmed);
 
     ostream& print(ostream& out) const;
   };
@@ -158,8 +158,10 @@ protected:
 
   /// Log is clean on [dirty_to, dirty_from)
   bool touched_log;
-  eversion_t dirty_to;
-  eversion_t dirty_from;
+  eversion_t dirty_to;         ///< must clear/writeout all keys up to dirty_to
+  eversion_t dirty_from;       ///< must clear/writeout all keys past dirty_from
+  eversion_t writeout_from;    ///< must writout keys past writeout_from
+  set<eversion_t> trimmed;     ///< must clear keys in trimmed
   bool dirty_divergent_priors;
   CephContext *cct;
 
@@ -167,7 +169,9 @@ protected:
     return !touched_log ||
       (dirty_to != eversion_t()) ||
       (dirty_from != eversion_t::max()) ||
-      dirty_divergent_priors;
+      dirty_divergent_priors ||
+      (writeout_from != eversion_t::max()) ||
+      !(trimmed.empty());
   }
   void mark_dirty_to(eversion_t to) {
     if (to > dirty_to)
@@ -177,6 +181,10 @@ protected:
     if (from < dirty_from)
       dirty_from = from;
   }
+  void mark_writeout_from(eversion_t from) {
+    if (from < writeout_from)
+      writeout_from = from;
+  }
   void add_divergent_prior(eversion_t version, hobject_t obj) {
     divergent_priors.insert(make_pair(version, obj));
     dirty_divergent_priors = true;
@@ -221,6 +229,8 @@ protected:
     dirty_from = eversion_t::max();
     dirty_divergent_priors = false;
     touched_log = true;
+    trimmed.clear();
+    writeout_from = eversion_t::max();
     check();
   }
 public:
@@ -281,7 +291,7 @@ public:
   void unindex() { log.unindex(); }
 
   void add(pg_log_entry_t& e) {
-    mark_dirty_from(e.version);
+    mark_writeout_from(e.version);
     log.add(e);
   }
 
@@ -374,6 +384,8 @@ public:
     const hobject_t &log_oid, map<eversion_t, hobject_t> &divergent_priors,
     eversion_t dirty_to,
     eversion_t dirty_from,
+    eversion_t writeout_from,
+    const set<eversion_t> &trimmed,
     bool dirty_divergent_priors,
     bool touch_log,
     set<string> *log_keys_debug