]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/: plumb rollback_trim_to through interfaces and messages
authorSamuel Just <sam.just@inktank.com>
Fri, 30 May 2014 23:53:37 +0000 (16:53 -0700)
committerSamuel Just <sam.just@inktank.com>
Fri, 27 Jun 2014 20:25:52 +0000 (13:25 -0700)
We simply trim the roll back info up to min_last_complete_ondisk
since we cannot correctly rollback past that point anyway!

ReplicatedPG
-> PGBackend::submit_transaction
-> Backend messages
-> PGBackend::Listener::log_operation
-> append_log

Signed-off-by: Samuel Just <sam.just@inktank.com>
13 files changed:
src/messages/MOSDSubOp.h
src/osd/ECBackend.cc
src/osd/ECBackend.h
src/osd/ECMsgTypes.cc
src/osd/ECMsgTypes.h
src/osd/PG.cc
src/osd/PG.h
src/osd/PGBackend.h
src/osd/PGLog.h
src/osd/ReplicatedBackend.cc
src/osd/ReplicatedBackend.h
src/osd/ReplicatedPG.cc
src/osd/ReplicatedPG.h

index 6a381861d613ee2dc2e0d521aa866743c29e7565..7b40c0a6823e42216b70cf398a5668f7809ca80d 100644 (file)
@@ -25,7 +25,7 @@
 
 class MOSDSubOp : public Message {
 
-  static const int HEAD_VERSION = 10;
+  static const int HEAD_VERSION = 11;
   static const int COMPAT_VERSION = 1;
 
 public:
@@ -63,6 +63,8 @@ public:
 
   // piggybacked osd/og state
   eversion_t pg_trim_to;   // primary->replica: trim to here
+  eversion_t pg_trim_rollback_to;   // primary->replica: trim rollback
+                                    // info to here
   osd_peer_stat_t peer_stat;
 
   map<string,bufferlist> attrset;
@@ -175,6 +177,11 @@ public:
     if (header.version >= 10) {
       ::decode(updated_hit_set_history, p);
     }
+    if (header.version >= 11) {
+      ::decode(pg_trim_rollback_to, p);
+    } else {
+      pg_trim_rollback_to = pg_trim_to;
+    }
   }
 
   virtual void encode_payload(uint64_t features) {
@@ -224,6 +231,7 @@ public:
     ::encode(from, payload);
     ::encode(pgid.shard, payload);
     ::encode(updated_hit_set_history, payload);
+    ::encode(pg_trim_rollback_to, payload);
   }
 
   MOSDSubOp()
index 3c27288c53259e758fcb733cdf717839960bca4e..7e454c84713a59cdd44a9ef6d337bfd25c0bff47 100644 (file)
@@ -829,6 +829,7 @@ void ECBackend::handle_sub_write(
     op.log_entries,
     op.updated_hit_set_history,
     op.trim_to,
+    op.trim_rollback_to,
     !(op.t.empty()),
     localt);
   localt->append(op.t);
@@ -1210,6 +1211,7 @@ void ECBackend::submit_transaction(
   const eversion_t &at_version,
   PGTransaction *_t,
   const eversion_t &trim_to,
+  const eversion_t &trim_rollback_to,
   vector<pg_log_entry_t> &log_entries,
   boost::optional<pg_hit_set_history_t> &hset_history,
   Context *on_local_applied_sync,
@@ -1225,6 +1227,7 @@ void ECBackend::submit_transaction(
   op->hoid = hoid;
   op->version = at_version;
   op->trim_to = trim_to;
+  op->trim_rollback_to = trim_rollback_to;
   op->log_entries.swap(log_entries);
   std::swap(op->updated_hit_set_history, hset_history);
   op->on_local_applied_sync = on_local_applied_sync;
@@ -1531,6 +1534,7 @@ void ECBackend::start_write(Op *op) {
       should_send ? iter->second : ObjectStore::Transaction(),
       op->version,
       op->trim_to,
+      op->trim_rollback_to,
       op->log_entries,
       op->updated_hit_set_history,
       op->temp_added,
index 2061ea87912b05fa98f28df3dc90267842894998..314e2b07f5886fcbcc1117217d66ad6704ee0d3d 100644 (file)
@@ -97,6 +97,7 @@ public:
     const eversion_t &at_version,
     PGTransaction *t,
     const eversion_t &trim_to,
+    const eversion_t &trim_rollback_to,
     vector<pg_log_entry_t> &log_entries,
     boost::optional<pg_hit_set_history_t> &hset_history,
     Context *on_local_applied_sync,
@@ -326,6 +327,7 @@ public:
     hobject_t hoid;
     eversion_t version;
     eversion_t trim_to;
+    eversion_t trim_rollback_to;
     vector<pg_log_entry_t> log_entries;
     boost::optional<pg_hit_set_history_t> updated_hit_set_history;
     Context *on_local_applied_sync;
index 4e4c8e3d94237207cff7c3d83aefaaf49ff2a5ff..47ef3533f33740c1e23ee42fe0f96e627f4206da 100644 (file)
@@ -16,7 +16,7 @@
 
 void ECSubWrite::encode(bufferlist &bl) const
 {
-  ENCODE_START(2, 1, bl);
+  ENCODE_START(3, 1, bl);
   ::encode(from, bl);
   ::encode(tid, bl);
   ::encode(reqid, bl);
@@ -29,12 +29,13 @@ void ECSubWrite::encode(bufferlist &bl) const
   ::encode(temp_added, bl);
   ::encode(temp_removed, bl);
   ::encode(updated_hit_set_history, bl);
+  ::encode(trim_rollback_to, bl);
   ENCODE_FINISH(bl);
 }
 
 void ECSubWrite::decode(bufferlist::iterator &bl)
 {
-  DECODE_START(2, bl);
+  DECODE_START(3, bl);
   ::decode(from, bl);
   ::decode(tid, bl);
   ::decode(reqid, bl);
@@ -49,6 +50,11 @@ void ECSubWrite::decode(bufferlist::iterator &bl)
   if (struct_v >= 2) {
     ::decode(updated_hit_set_history, bl);
   }
+  if (struct_v >= 3) {
+    ::decode(trim_rollback_to, bl);
+  } else {
+    trim_rollback_to = trim_to;
+  }
   DECODE_FINISH(bl);
 }
 
@@ -58,7 +64,8 @@ std::ostream &operator<<(
   lhs << "ECSubWrite(tid=" << rhs.tid
       << ", reqid=" << rhs.reqid
       << ", at_version=" << rhs.at_version
-      << ", trim_to=" << rhs.trim_to;
+      << ", trim_to=" << rhs.trim_to
+      << ", trim_rollback_to=" << rhs.trim_rollback_to;
   if (rhs.updated_hit_set_history)
     lhs << ", has_updated_hit_set_history";
   return lhs <<  ")";
@@ -70,6 +77,7 @@ void ECSubWrite::dump(Formatter *f) const
   f->dump_stream("reqid") << reqid;
   f->dump_stream("at_version") << at_version;
   f->dump_stream("trim_to") << trim_to;
+  f->dump_stream("trim_rollback_to") << trim_rollback_to;
   f->dump_stream("has_updated_hit_set_history")
     << static_cast<bool>(updated_hit_set_history);
 }
@@ -85,6 +93,12 @@ void ECSubWrite::generate_test_instances(list<ECSubWrite*> &o)
   o.back()->reqid = osd_reqid_t(entity_name_t::CLIENT(123), 1, 45678);
   o.back()->at_version = eversion_t(10, 300);
   o.back()->trim_to = eversion_t(5, 42);
+  o.push_back(new ECSubWrite());
+  o.back()->tid = 9;
+  o.back()->reqid = osd_reqid_t(entity_name_t::CLIENT(123), 1, 45678);
+  o.back()->at_version = eversion_t(10, 300);
+  o.back()->trim_to = eversion_t(5, 42);
+  o.back()->trim_rollback_to = eversion_t(8, 250);
 }
 
 void ECSubWriteReply::encode(bufferlist &bl) const
index 11c519d57bb4957a104a9fb77a8443ad537b3453..1cdfa57e153c50424ffbc094520b77c65dd42732 100644 (file)
@@ -28,6 +28,7 @@ struct ECSubWrite {
   ObjectStore::Transaction t;
   eversion_t at_version;
   eversion_t trim_to;
+  eversion_t trim_rollback_to;
   vector<pg_log_entry_t> log_entries;
   set<hobject_t> temp_added;
   set<hobject_t> temp_removed;
@@ -42,6 +43,7 @@ struct ECSubWrite {
     const ObjectStore::Transaction &t,
     eversion_t at_version,
     eversion_t trim_to,
+    eversion_t trim_rollback_to,
     vector<pg_log_entry_t> log_entries,
     boost::optional<pg_hit_set_history_t> updated_hit_set_history,
     const set<hobject_t> &temp_added,
@@ -49,7 +51,8 @@ struct ECSubWrite {
     : from(from), tid(tid), reqid(reqid),
       soid(soid), stats(stats), t(t),
       at_version(at_version),
-      trim_to(trim_to), log_entries(log_entries),
+      trim_to(trim_to), trim_rollback_to(trim_rollback_to),
+      log_entries(log_entries),
       temp_added(temp_added),
       temp_removed(temp_removed),
       updated_hit_set_history(updated_hit_set_history) {}
index d729de3759e5e3e8a54072e4219069a55f7bf42e..3be06f0010aaa171cf7e8518947f70d458f13e5e 100644 (file)
@@ -2640,7 +2640,10 @@ void PG::add_log_entry(pg_log_entry_t& e, bufferlist& log_bl)
 
 
 void PG::append_log(
-  vector<pg_log_entry_t>& logv, eversion_t trim_to, ObjectStore::Transaction &t,
+  vector<pg_log_entry_t>& logv,
+  eversion_t trim_to,
+  eversion_t trim_rollback_to,
+  ObjectStore::Transaction &t,
   bool transaction_applied)
 {
   if (transaction_applied)
@@ -2654,13 +2657,23 @@ void PG::append_log(
     p->offset = 0;
     add_log_entry(*p, keys[p->get_key_name()]);
   }
-  if (!transaction_applied)
-    pg_log.clear_can_rollback_to();
+
+  PGLogEntryHandler handler;
+  if (!transaction_applied) {
+    pg_log.clear_can_rollback_to(&handler);
+  } else if (trim_rollback_to > pg_log.get_rollback_trimmed_to()) {
+    pg_log.trim_rollback_info(
+      trim_rollback_to,
+      &handler);
+  }
 
   dout(10) << "append_log  adding " << keys.size() << " keys" << dendl;
   t.omap_setkeys(coll_t::META_COLL, log_oid, keys);
-  PGLogEntryHandler handler;
+
   pg_log.trim(&handler, trim_to, info);
+
+  dout(10) << __func__ << ": trimming to " << trim_rollback_to
+          << " entries " << handler.to_trim << dendl;
   handler.apply(this, &t);
 
   // update the local pg, pg log
index 8967a56fd2d7d9b8cea8b415700f8944c84834e4..7fd52fd2011211ceb4df0ea572172f3e87d2c547 100644 (file)
@@ -1996,7 +1996,10 @@ public:
 
   void add_log_entry(pg_log_entry_t& e, bufferlist& log_bl);
   void append_log(
-    vector<pg_log_entry_t>& logv, eversion_t trim_to, ObjectStore::Transaction &t,
+    vector<pg_log_entry_t>& logv,
+    eversion_t trim_to,
+    eversion_t trim_rollback_to,
+    ObjectStore::Transaction &t,
     bool transaction_applied = true);
   bool check_log_for_corruption(ObjectStore *store);
   void trim_peers();
index 1dbf20d2a75d62d9297d96dcd079d843dc64c3df..707252704a66552c1e3eeb0ce67880a7269738fb 100644 (file)
        vector<pg_log_entry_t> &logv,
        boost::optional<pg_hit_set_history_t> &hset_history,
        const eversion_t &trim_to,
+       const eversion_t &trim_rollback_to,
        bool transaction_applied,
        ObjectStore::Transaction *t) = 0;
 
      const eversion_t &at_version,        ///< [in] version
      PGTransaction *t,                    ///< [in] trans to execute
      const eversion_t &trim_to,           ///< [in] trim log to here
+     const eversion_t &trim_rollback_to,  ///< [in] trim rollback info to here
      vector<pg_log_entry_t> &log_entries, ///< [in] log entries for t
      /// [in] hitset history (if updated with this transaction)
      boost::optional<pg_hit_set_history_t> &hset_history,
index c46f63bfa39f653479f3c5c939667878a28723a0..8d0701f36537a75bd746bd422c3e8c3dcfdf24b8 100644 (file)
@@ -362,8 +362,21 @@ public:
     eversion_t trim_to,
     pg_info_t &info);
 
-  void clear_can_rollback_to() {
+  void trim_rollback_info(
+    eversion_t trim_rollback_to,
+    LogEntryHandler *h) {
+    if (trim_rollback_to > log.can_rollback_to)
+      log.can_rollback_to = trim_rollback_to;
+    log.advance_rollback_info_trimmed_to(
+      trim_rollback_to,
+      h);
+  }
+
+  void clear_can_rollback_to(LogEntryHandler *h) {
     log.can_rollback_to = log.head;
+    log.advance_rollback_info_trimmed_to(
+      log.head,
+      h);
   }
 
   //////////////////// get or set log & missing ////////////////////
index 5a9668fc2a161c52e9b546099a37eb0a220be162..1e1c6ff97a3a316eb2753cc1ff51e32d2bc9253f 100644 (file)
@@ -493,6 +493,7 @@ void ReplicatedBackend::submit_transaction(
   const eversion_t &at_version,
   PGTransaction *_t,
   const eversion_t &trim_to,
+  const eversion_t &trim_rollback_to,
   vector<pg_log_entry_t> &log_entries,
   boost::optional<pg_hit_set_history_t> &hset_history,
   Context *on_local_applied_sync,
@@ -533,6 +534,7 @@ void ReplicatedBackend::submit_transaction(
     tid,
     reqid,
     trim_to,
+    trim_rollback_to,
     t->get_temp_added().size() ? *(t->get_temp_added().begin()) : hobject_t(),
     t->get_temp_cleared().size() ?
       *(t->get_temp_cleared().begin()) :hobject_t(),
@@ -548,7 +550,13 @@ void ReplicatedBackend::submit_transaction(
   }
   clear_temp_objs(t->get_temp_cleared());
 
-  parent->log_operation(log_entries, hset_history, trim_to, true, &local_t);
+  parent->log_operation(
+    log_entries,
+    hset_history,
+    trim_to,
+    trim_rollback_to,
+    true,
+    &local_t);
   local_t.append(*op_t);
   local_t.swap(*op_t);
   
index 371574b35766b49568cebd51bc8dd0d3cae0584e..26f8f517171b7e17935c05b0916299b0d13282fc 100644 (file)
@@ -342,6 +342,7 @@ public:
     const eversion_t &at_version,
     PGTransaction *t,
     const eversion_t &trim_to,
+    const eversion_t &trim_rollback_to,
     vector<pg_log_entry_t> &log_entries,
     boost::optional<pg_hit_set_history_t> &hset_history,
     Context *on_local_applied_sync,
@@ -359,6 +360,7 @@ private:
     ceph_tid_t tid,
     osd_reqid_t reqid,
     eversion_t pg_trim_to,
+    eversion_t pg_trim_rollback_to,
     hobject_t new_temp_oid,
     hobject_t discard_temp_oid,
     vector<pg_log_entry_t> &log_entries,
index 95b61a3112246da131e2b8981090659cd230f069..57f7326a72295f83e5cdccfe03aaa505038db025 100644 (file)
@@ -6718,6 +6718,7 @@ void ReplicatedPG::issue_repop(RepGather *repop, utime_t now)
     repop->ctx->at_version,
     repop->ctx->op_t,
     pg_trim_to,
+    min_last_complete_ondisk,
     repop->ctx->log,
     repop->ctx->updated_hset_history,
     onapplied_sync,
@@ -6735,6 +6736,7 @@ void ReplicatedBackend::issue_op(
   ceph_tid_t tid,
   osd_reqid_t reqid,
   eversion_t pg_trim_to,
+  eversion_t pg_trim_rollback_to,
   hobject_t new_temp_oid,
   hobject_t discard_temp_oid,
   vector<pg_log_entry_t> &log_entries,
@@ -6790,6 +6792,7 @@ void ReplicatedBackend::issue_op(
       wr->pg_stats = get_info().stats;
     
     wr->pg_trim_to = pg_trim_to;
+    wr->pg_trim_rollback_to = pg_trim_rollback_to;
 
     wr->new_temp_oid = new_temp_oid;
     wr->discard_temp_oid = discard_temp_oid;
@@ -7590,6 +7593,7 @@ void ReplicatedBackend::sub_op_modify(OpRequestRef op)
       log,
       m->updated_hit_set_history,
       m->pg_trim_to,
+      m->pg_trim_rollback_to,
       update_snaps,
       &(rm->localt));
       
index abee8bf57b67acddb30b8a0561e2eca627d8c77a..5df59f8b0497e084c5fe1af5f97fa81a79a61d96 100644 (file)
@@ -347,13 +347,14 @@ public:
     vector<pg_log_entry_t> &logv,
     boost::optional<pg_hit_set_history_t> &hset_history,
     const eversion_t &trim_to,
+    const eversion_t &trim_rollback_to,
     bool transaction_applied,
     ObjectStore::Transaction *t) {
     if (hset_history) {
       info.hit_set = *hset_history;
       dirty_info = true;
     }
-    append_log(logv, trim_to, *t, transaction_applied);
+    append_log(logv, trim_to, trim_rollback_to, *t, transaction_applied);
   }
 
   void op_applied(