]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd/: refactor PGLog a bit and add support for rolling back extents
authorSamuel Just <sjust@redhat.com>
Tue, 15 Nov 2016 23:47:37 +0000 (15:47 -0800)
committerSamuel Just <sjust@redhat.com>
Thu, 17 Nov 2016 18:40:19 +0000 (10:40 -0800)
It was hard to reason about the validity of the IndexedLog internal
pointers and iterators during updates, so this patch cleans that up
a bunch.  It also moves responsibility for doing rollbacks into
PGBackend.  Finally, it adds support for the new log entry format.

Signed-off-by: Samuel Just <sjust@redhat.com>
12 files changed:
src/osd/OSD.cc
src/osd/PG.cc
src/osd/PG.h
src/osd/PGBackend.cc
src/osd/PGBackend.h
src/osd/PGLog.cc
src/osd/PGLog.h
src/osd/ReplicatedBackend.cc
src/osd/ReplicatedPG.cc
src/osd/ReplicatedPG.h
src/osd/osd_types.h
src/test/osd/TestPGLog.cc

index 4d891a493d631a7bc63ee30531e5d58ed029c080..91766776eb16a0630f5372404b80ada3987b05d1 100644 (file)
@@ -8063,9 +8063,7 @@ void OSD::handle_pg_trim(OpRequestRef op)
   } else {
     // primary is instructing us to trim
     ObjectStore::Transaction t;
-    PG::PGLogEntryHandler handler;
-    pg->pg_log.trim(&handler, m->trim_to, pg->info);
-    handler.apply(pg, &t);
+    pg->pg_log.trim(m->trim_to, pg->info);
     pg->dirty_info = true;
     pg->write_if_dirty(t);
     int tr = store->queue_transaction(pg->osr.get(), std::move(t), NULL);
index fc466a408243c1f43250d9c05cab21c52b78708c..73c71bbdc9be4c4d3c82c408cfeea402efd9e033 100644 (file)
@@ -435,18 +435,16 @@ void PG::update_object_snap_mapping(
 void PG::merge_log(
   ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t &olog, pg_shard_t from)
 {
-  PGLogEntryHandler rollbacker;
+  PGLogEntryHandler rollbacker{this, &t};
   pg_log.merge_log(
     t, oinfo, olog, from, info, &rollbacker, dirty_info, dirty_big_info);
-  rollbacker.apply(this, &t);
 }
 
 void PG::rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead)
 {
-  PGLogEntryHandler rollbacker;
+  PGLogEntryHandler rollbacker{this, &t};
   pg_log.rewind_divergent_log(
     t, newhead, info, &rollbacker, dirty_info, dirty_big_info);
-  rollbacker.apply(this, &t);
 }
 
 /*
@@ -1590,7 +1588,7 @@ void PG::activate(ObjectStore::Transaction& t,
     min_last_complete_ondisk = eversion_t(0,0);  // we don't know (yet)!
   }
   last_update_applied = info.last_update;
-  last_rollback_info_trimmed_to_applied = pg_log.get_rollback_trimmed_to();
+  last_rollback_info_trimmed_to_applied = pg_log.get_can_rollback_to();
 
   need_up_thru = false;
 
@@ -1848,10 +1846,12 @@ void PG::activate(ObjectStore::Transaction& t,
 
     state_set(PG_STATE_ACTIVATING);
   }
+  if (is_primary()) {
+    projected_last_update = info.last_update;
+  }
   if (acting.size() >= pool.info.min_size) {
-    PGLogEntryHandler handler;
+    PGLogEntryHandler handler{this, &t};
     pg_log.roll_forward(&handler);
-    handler.apply(this, &t);
   }
 }
 
@@ -3053,21 +3053,29 @@ void PG::append_log(
   }
   dout(10) << "append_log " << pg_log.get_log() << " " << logv << dendl;
 
+  PGLogEntryHandler handler{this, &t};
+  if (!transaction_applied) {
+     /* We must be a backfill peer, so it's ok if we apply
+      * out-of-turn since we won't be considered when
+      * determining a min possible last_update.
+      */
+    pg_log.roll_forward(&handler);
+  }
+
   for (vector<pg_log_entry_t>::const_iterator p = logv.begin();
        p != logv.end();
        ++p) {
-    add_log_entry(*p);
-  }
+    add_log_entry(*p, transaction_applied);
 
-  PGLogEntryHandler handler;
-  if (!transaction_applied) {
-    pg_log.roll_forward(&handler);
-    t.register_on_applied(
-      new C_UpdateLastRollbackInfoTrimmedToApplied(
-       this,
-       get_osdmap()->get_epoch(),
-       info.last_update));
-  } else if (roll_forward_to > pg_log.get_rollback_trimmed_to()) {
+    /* We don't want to leave the rollforward artifacts around
+     * here past last_backfill.  It's ok for the same reason as
+     * above */
+    if (transaction_applied &&
+       (cmp(p->soid, info.last_backfill, get_sort_bitwise()) > 0)) {
+      pg_log.roll_forward(&handler);
+    }
+  }
+  if (transaction_applied && roll_forward_to > pg_log.get_can_rollback_to()) {
     pg_log.roll_forward_to(
       roll_forward_to,
       &handler);
@@ -3078,11 +3086,7 @@ void PG::append_log(
        roll_forward_to));
   }
 
-  pg_log.trim(&handler, trim_to, info);
-
-  dout(10) << __func__ << ": rolling forward to " << roll_forward_to
-          << " entries " << handler.to_trim << dendl;
-  handler.apply(this, &t);
+  pg_log.trim(trim_to, info);
 
   // update the local pg, pg log
   dirty_info = true;
@@ -4653,13 +4657,12 @@ bool PG::append_log_entries_update_missing(
   assert(!entries.empty());
   assert(entries.begin()->version > info.last_update);
 
-  PGLogEntryHandler rollbacker;
+  PGLogEntryHandler rollbacker{this, &t};
   bool invalidate_stats =
     pg_log.append_new_log_entries(info.last_backfill,
                                  info.last_backfill_bitwise,
                                  entries,
                                  &rollbacker);
-  rollbacker.apply(this, &t);
   info.last_update = pg_log.get_head();
 
   if (pg_log.get_missing().num_missing() == 0) {
@@ -4695,6 +4698,7 @@ void PG::merge_new_log_entries(
       pinfo.last_backfill,
       info.last_backfill_bitwise,
       entries,
+      true,
       NULL,
       pmissing,
       NULL,
@@ -5286,7 +5290,7 @@ ostream& operator<<(ostream& out, const PG& pg)
 
   if (!pg.backfill_targets.empty())
     out << " bft=" << pg.backfill_targets;
-  out << " crt=" << pg.pg_log.get_log().can_rollback_to;
+  out << " crt=" << pg.pg_log.get_can_rollback_to();
 
   if (pg.last_complete_ondisk != pg.info.last_complete)
     out << " lcod " << pg.last_complete_ondisk;
@@ -7126,9 +7130,8 @@ boost::statechart::result PG::RecoveryState::Stray::react(const MLogRec& logevt)
     pg->dirty_info = true;
     pg->dirty_big_info = true;  // maybe.
 
-    PGLogEntryHandler rollbacker;
+    PGLogEntryHandler rollbacker{pg, t};
     pg->pg_log.reset_backfill_claim_log(msg->log, &rollbacker);
-    rollbacker.apply(pg, t);
 
     pg->pg_log.reset_backfill();
   } else {
index 891fe991c6420f606401651d935aac80d823539d..ce79c69660fde77e288b4d127421d39501b55f6b 100644 (file)
@@ -941,92 +941,30 @@ public:
   bool proc_replica_info(
     pg_shard_t from, const pg_info_t &info, epoch_t send_epoch);
 
-
-  struct LogEntryTrimmer : public ObjectModDesc::Visitor {
-    const hobject_t &soid;
-    PG *pg;
-    ObjectStore::Transaction *t;
-    LogEntryTrimmer(const hobject_t &soid, PG *pg, ObjectStore::Transaction *t)
-      : soid(soid), pg(pg), t(t) {}
-    void rmobject(version_t old_version) {
-      pg->get_pgbackend()->trim_stashed_object(
-       soid,
-       old_version,
-       t);
-    }
-  };
-
-  struct SnapRollBacker : public ObjectModDesc::Visitor {
-    const hobject_t &soid;
+  struct PGLogEntryHandler : public PGLog::LogEntryHandler {
     PG *pg;
     ObjectStore::Transaction *t;
-    SnapRollBacker(const hobject_t &soid, PG *pg, ObjectStore::Transaction *t)
-      : soid(soid), pg(pg), t(t) {}
-    void update_snaps(set<snapid_t> &snaps) {
-      pg->update_object_snap_mapping(t, soid, snaps);
-    }
-    void create() {
-      pg->clear_object_snap_mapping(
-       t,
-       soid);
-    }
-  };
+    PGLogEntryHandler(PG *pg, ObjectStore::Transaction *t) : pg(pg), t(t) {}
 
-  struct PGLogEntryHandler : public PGLog::LogEntryHandler {
-    mempool::osd::list<pg_log_entry_t> to_rollback;
-    set<hobject_t, hobject_t::BitwiseComparator> to_remove;
-    mempool::osd::list<pg_log_entry_t> to_trim;
-    list<pair<hobject_t, version_t> > to_stash;
-    
     // LogEntryHandler
     void remove(const hobject_t &hoid) {
-      to_remove.insert(hoid);
+      pg->get_pgbackend()->remove(hoid, t);
     }
     void try_stash(const hobject_t &hoid, version_t v) {
-      to_stash.push_back(make_pair(hoid, v));
+      pg->get_pgbackend()->try_stash(hoid, v, t);
     }
     void rollback(const pg_log_entry_t &entry) {
-      to_rollback.push_back(entry);
+      assert(entry.can_rollback());
+      pg->get_pgbackend()->rollback(entry, t);
     }
     void rollforward(const pg_log_entry_t &entry) {
-      to_trim.push_back(entry);
+      pg->get_pgbackend()->rollforward(entry, t);
     }
     void trim(const pg_log_entry_t &entry) {
-      to_trim.push_back(entry);
-    }
-
-    void apply(PG *pg, ObjectStore::Transaction *t) {
-      for (list<pg_log_entry_t>::iterator j = to_rollback.begin();
-          j != to_rollback.end();
-          ++j) {
-       assert(j->mod_desc.can_rollback());
-       pg->get_pgbackend()->rollback(j->soid, j->mod_desc, t);
-       SnapRollBacker rollbacker(j->soid, pg, t);
-       j->mod_desc.visit(&rollbacker);
-      }
-      for (list<pair<hobject_t, version_t> >::iterator i = to_stash.begin();
-          i != to_stash.end();
-          ++i) {
-       pg->get_pgbackend()->try_stash(i->first, i->second, t);
-      }
-      for (set<hobject_t, hobject_t::BitwiseComparator>::iterator i = to_remove.begin();
-          i != to_remove.end();
-          ++i) {
-       pg->get_pgbackend()->rollback_create(*i, t);
-       pg->remove_snap_mapped_object(*t, *i);
-      }
-      for (list<pg_log_entry_t>::reverse_iterator i = to_trim.rbegin();
-          i != to_trim.rend();
-          ++i) {
-       LogEntryTrimmer trimmer(i->soid, pg, t);
-       i->mod_desc.visit(&trimmer);
-      }
+      pg->get_pgbackend()->trim(entry, t);
     }
   };
   
-  friend struct SnapRollBacker;
-  friend struct PGLogEntryHandler;
-  friend struct LogEntryTrimmer;
   void update_object_snap_mapping(
     ObjectStore::Transaction *t, const hobject_t &soid,
     const set<snapid_t> &snaps);
index c3153d01a1a67bffd2e5c2fabf9feceee1f6f116..65b56c5a0105842d13a236c34d834a4eb979f1bd 100644 (file)
@@ -36,60 +36,117 @@ static ostream& _prefix(std::ostream *_dout, PGBackend *pgb) {
   return *_dout << pgb->get_parent()->gen_dbg_prefix();
 }
 
-// -- ObjectModDesc --
-struct RollbackVisitor : public ObjectModDesc::Visitor {
-  const hobject_t &hoid;
+void PGBackend::rollback(
+  const pg_log_entry_t &entry,
+  ObjectStore::Transaction *t)
+{
+
+  struct RollbackVisitor : public ObjectModDesc::Visitor {
+    const hobject_t &hoid;
+    PGBackend *pg;
+    ObjectStore::Transaction t;
+    RollbackVisitor(
+      const hobject_t &hoid,
+      PGBackend *pg) : hoid(hoid), pg(pg) {}
+    void append(uint64_t old_size) override {
+      ObjectStore::Transaction temp;
+      pg->rollback_append(hoid, old_size, &temp);
+      temp.append(t);
+      temp.swap(t);
+    }
+    void setattrs(map<string, boost::optional<bufferlist> > &attrs) override {
+      ObjectStore::Transaction temp;
+      pg->rollback_setattrs(hoid, attrs, &temp);
+      temp.append(t);
+      temp.swap(t);
+    }
+    void rmobject(version_t old_version) override {
+      ObjectStore::Transaction temp;
+      pg->rollback_stash(hoid, old_version, &temp);
+      temp.append(t);
+      temp.swap(t);
+    }
+    void try_rmobject(version_t old_version) override {
+      ObjectStore::Transaction temp;
+      pg->rollback_try_stash(hoid, old_version, &temp);
+      temp.append(t);
+      temp.swap(t);
+    }
+    void create() override {
+      ObjectStore::Transaction temp;
+      pg->rollback_create(hoid, &temp);
+      temp.append(t);
+      temp.swap(t);
+    }
+    void update_snaps(const set<snapid_t> &snaps) override {
+      ObjectStore::Transaction temp;
+      pg->get_parent()->pgb_set_object_snap_mapping(hoid, snaps, &temp);
+      temp.append(t);
+      temp.swap(t);
+    }
+    void rollback_extents(
+      version_t gen,
+      const vector<pair<uint64_t, uint64_t> > &extents) override {
+      ObjectStore::Transaction temp;
+      pg->rollback_extents(gen, extents, hoid, &temp);
+      temp.append(t);
+      temp.swap(t);
+    }
+  };
+
+  assert(entry.mod_desc.can_rollback());
+  RollbackVisitor vis(entry.soid, this);
+  entry.mod_desc.visit(&vis);
+  t->append(vis.t);
+}
+
+struct Trimmer : public ObjectModDesc::Visitor {
+  const hobject_t &soid;
   PGBackend *pg;
-  ObjectStore::Transaction t;
-  RollbackVisitor(
-    const hobject_t &hoid,
-    PGBackend *pg) : hoid(hoid), pg(pg) {}
-  void append(uint64_t old_size) {
-    ObjectStore::Transaction temp;
-    pg->rollback_append(hoid, old_size, &temp);
-    temp.append(t);
-    temp.swap(t);
-  }
-  void setattrs(map<string, boost::optional<bufferlist> > &attrs) {
-    ObjectStore::Transaction temp;
-    pg->rollback_setattrs(hoid, attrs, &temp);
-    temp.append(t);
-    temp.swap(t);
-  }
+  ObjectStore::Transaction *t;
+  Trimmer(
+    const hobject_t &soid,
+    PGBackend *pg,
+    ObjectStore::Transaction *t)
+    : soid(soid), pg(pg), t(t) {}
   void rmobject(version_t old_version) {
-    ObjectStore::Transaction temp;
-    pg->rollback_stash(hoid, old_version, &temp);
-    temp.append(t);
-    temp.swap(t);
-  }
-  void try_rmobject(version_t old_version) {
-    ObjectStore::Transaction temp;
-    pg->rollback_try_stash(hoid, old_version, &temp);
-    temp.append(t);
-    temp.swap(t);
-  }
-  void create() {
-    ObjectStore::Transaction temp;
-    pg->rollback_create(hoid, &temp);
-    temp.append(t);
-    temp.swap(t);
+    pg->trim_rollback_object(
+      soid,
+      old_version,
+      t);
   }
-  void update_snaps(set<snapid_t> &snaps) {
-    // pass
+  // try_rmobject defaults to rmobject
+  void rollback_extents(
+    version_t gen,
+    const vector<pair<uint64_t, uint64_t> > &extents) override {
+    pg->trim_rollback_object(
+      soid,
+      gen,
+      t);
   }
 };
 
-void PGBackend::rollback(
-  const hobject_t &hoid,
-  const ObjectModDesc &desc,
+void PGBackend::rollforward(
+  const pg_log_entry_t &entry,
   ObjectStore::Transaction *t)
 {
-  assert(desc.can_rollback());
-  RollbackVisitor vis(hoid, this);
-  desc.visit(&vis);
-  t->append(vis.t);
+  auto dpp = get_parent()->get_dpp();
+  ldpp_dout(dpp, 20) << __func__ << ": entry=" << entry << dendl;
+  if (!entry.can_rollback())
+    return;
+  Trimmer trimmer(entry.soid, this, t);
+  entry.mod_desc.visit(&trimmer);
 }
 
+void PGBackend::trim(
+  const pg_log_entry_t &entry,
+  ObjectStore::Transaction *t)
+{
+  if (!entry.can_rollback())
+    return;
+  Trimmer trimmer(entry.soid, this, t);
+  entry.mod_desc.visit(&trimmer);
+}
 
 void PGBackend::try_stash(
   const hobject_t &hoid,
@@ -102,6 +159,16 @@ void PGBackend::try_stash(
     ghobject_t(hoid, v, get_parent()->whoami_shard().shard));
 }
 
+void PGBackend::remove(
+  const hobject_t &hoid,
+  ObjectStore::Transaction *t) {
+  assert(!hoid.is_temp());
+  t->remove(
+    coll,
+    ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
+  get_parent()->pgb_clear_object_snap_mapping(hoid, t);
+}
+
 void PGBackend::on_change_cleanup(ObjectStore::Transaction *t)
 {
   dout(10) << __func__ << dendl;
@@ -293,16 +360,27 @@ void PGBackend::rollback_try_stash(
     ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
 }
 
-void PGBackend::rollback_create(
+void PGBackend::rollback_extents(
+  version_t gen,
+  const vector<pair<uint64_t, uint64_t> > &extents,
   const hobject_t &hoid,
   ObjectStore::Transaction *t) {
-  assert(!hoid.is_temp());
+  auto shard = get_parent()->whoami_shard().shard;
+  for (auto &&extent: extents) {
+    t->clone_range(
+      coll,
+      ghobject_t(hoid, gen, shard),
+      ghobject_t(hoid, ghobject_t::NO_GEN, shard),
+      extent.first,
+      extent.second,
+      extent.first);
+  }
   t->remove(
     coll,
-    ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
+    ghobject_t(hoid, gen, shard));
 }
 
-void PGBackend::trim_stashed_object(
+void PGBackend::trim_rollback_object(
   const hobject_t &hoid,
   version_t old_version,
   ObjectStore::Transaction *t) {
index 7f7a4481a2c3d5e3146276180f4990ebd3702960..c92d44fb8558d64b7bdd9e594fa61577818525d7 100644 (file)
@@ -196,6 +196,15 @@ typedef ceph::shared_ptr<const OSDMap> OSDMapRef;
        bool transaction_applied,
        ObjectStore::Transaction &t) = 0;
 
+     virtual void pgb_set_object_snap_mapping(
+       const hobject_t &soid,
+       const set<snapid_t> &snaps,
+       ObjectStore::Transaction *t) = 0;
+
+     virtual void pgb_clear_object_snap_mapping(
+       const hobject_t &soid,
+       ObjectStore::Transaction *t) = 0;
+
      virtual void update_peer_last_complete_ondisk(
        pg_shard_t fromosd,
        eversion_t lcod) = 0;
@@ -393,10 +402,23 @@ typedef ceph::shared_ptr<const OSDMap> OSDMapRef;
      ObjectStore::Transaction *t);
 
    void rollback(
+     const pg_log_entry_t &entry,
+     ObjectStore::Transaction *t);
+
+   friend class LRBTrimmer;
+   void rollforward(
+     const pg_log_entry_t &entry,
+     ObjectStore::Transaction *t);
+
+   void trim(
+     const pg_log_entry_t &entry,
+     ObjectStore::Transaction *t);
+
+   void remove(
      const hobject_t &hoid,
-     const ObjectModDesc &desc,
      ObjectStore::Transaction *t);
 
+ protected:
    /// Reapply old attributes
    void rollback_setattrs(
      const hobject_t &hoid,
@@ -423,13 +445,23 @@ typedef ceph::shared_ptr<const OSDMap> OSDMapRef;
 
    /// Delete object to rollback create
    void rollback_create(
+     const hobject_t &hoid,
+     ObjectStore::Transaction *t) {
+     remove(hoid, t);
+   }
+
+   /// Clone the extents back into place
+   void rollback_extents(
+     version_t gen,
+     const vector<pair<uint64_t, uint64_t> > &extents,
      const hobject_t &hoid,
      ObjectStore::Transaction *t);
+ public:
 
-   /// Trim object stashed at stashed_version
-   void trim_stashed_object(
+   /// Trim object stashed at version
+   void trim_rollback_object(
      const hobject_t &hoid,
-     version_t stashed_version,
+     version_t gen,
      ObjectStore::Transaction *t);
 
    /// List objects in collection
index b0fc2e25e2bae99d98383f7dcd72b6a72c2bec35..ea2a85000d5782b3757952cdcf695565896f0762 100644 (file)
@@ -30,49 +30,17 @@ static ostream& _prefix(std::ostream *_dout, const PGLog *pglog)
 
 //////////////////// PGLog::IndexedLog ////////////////////
 
-void PGLog::IndexedLog::filter_log(spg_t pgid, const OSDMap &map, const string &hit_set_namespace)
-{
-  IndexedLog out;
-  pg_log_t reject;
-
-  pg_log_t::filter_log(pgid, map, hit_set_namespace, *this, out, reject);
-
-  *this = out;
-  index();
-}
-
-void PGLog::IndexedLog::split_into(
+PGLog::IndexedLog PGLog::IndexedLog::split_out_child(
   pg_t child_pgid,
-  unsigned split_bits,
-  PGLog::IndexedLog *olog)
+  unsigned split_bits)
 {
-  mempool::osd::list<pg_log_entry_t> oldlog;
-  oldlog.swap(log);
-
-  eversion_t old_tail;
-  olog->head = head;
-  olog->tail = tail;
-  unsigned mask = ~((~0)<<split_bits);
-  for (list<pg_log_entry_t>::iterator i = oldlog.begin();
-       i != oldlog.end();
-       ) {
-    if ((i->soid.get_hash() & mask) == child_pgid.m_seed) {
-      olog->log.push_back(*i);
-    } else {
-      log.push_back(*i);
-    }
-    oldlog.erase(i++);
-  }
-
-
-  olog->can_rollback_to = can_rollback_to;
-
-  olog->index();
+  IndexedLog ret(pg_log_t::split_out_child(child_pgid, split_bits));
   index();
+  reset_rollback_info_trimmed_to_riter();
+  return ret;
 }
 
 void PGLog::IndexedLog::trim(
-  LogEntryHandler *handler,
   eversion_t s,
   set<eversion_t> *trimmed)
 {
@@ -83,9 +51,7 @@ void PGLog::IndexedLog::trim(
                    << " on " << *this << dendl;
   }
 
-  if (s > can_rollback_to)
-    can_rollback_to = s;
-  trim_rollback_info_to(s, handler);
+  assert(s <= can_rollback_to);
 
   while (!log.empty()) {
     pg_log_entry_t &e = *log.begin();
@@ -145,7 +111,6 @@ void PGLog::clear_info_log(
 }
 
 void PGLog::trim(
-  LogEntryHandler *handler,
   eversion_t trim_to,
   pg_info_t &info)
 {
@@ -155,7 +120,7 @@ void PGLog::trim(
     assert(trim_to <= info.last_complete);
 
     dout(10) << "trim " << log << " to " << trim_to << dendl;
-    log.trim(handler, trim_to, &trimmed);
+    log.trim(trim_to, &trimmed);
     info.log_tail = log.tail;
   }
 }
@@ -223,38 +188,13 @@ void PGLog::proc_replica_log(
     log.tail :
     first_non_divergent->version;
 
-  mempool::osd::list<pg_log_entry_t> divergent;
-  list<pg_log_entry_t>::const_iterator pp = olog.log.end();
-  while (true) {
-    if (pp == olog.log.begin())
-      break;
-
-    --pp;
-    const pg_log_entry_t& oe = *pp;
-
-    // don't continue past the tail of our log.
-    if (oe.version <= log.tail) {
-      ++pp;
-      break;
-    }
-
-    if (oe.version <= lu) {
-      ++pp;
-      break;
-    }
-
-    divergent.push_front(oe);
-  }
-
-
-  IndexedLog folog;
-  folog.log.insert(folog.log.begin(), olog.log.begin(), pp);
-  folog.index();
+  IndexedLog folog(olog);
+  auto divergent = folog.rewind_from_head(lu);
   _merge_divergent_entries(
     folog,
     divergent,
     oinfo,
-    olog.can_rollback_to,
+    olog.get_can_rollback_to(),
     omissing,
     0,
     this);
@@ -296,49 +236,29 @@ void PGLog::rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead
                                 bool &dirty_info, bool &dirty_big_info)
 {
   dout(10) << "rewind_divergent_log truncate divergent future " << newhead << dendl;
-  assert(newhead >= log.tail);
-
-  list<pg_log_entry_t>::iterator p = log.log.end();
-  mempool::osd::list<pg_log_entry_t> divergent;
-  while (true) {
-    if (p == log.log.begin()) {
-      // yikes, the whole thing is divergent!
-      divergent.swap(log.log);
-      break;
-    }
-    --p;
-    mark_dirty_from(p->version);
-    if (p->version <= newhead) {
-      ++p;
-      divergent.splice(divergent.begin(), log.log, p, log.log.end());
-      break;
-    }
-    assert(p->version > newhead);
-    dout(10) << "rewind_divergent_log future divergent " << *p << dendl;
-  }
 
-  log.head = newhead;
-  info.last_update = newhead;
+
   if (info.last_complete > newhead)
     info.last_complete = newhead;
 
-  if (log.rollback_info_trimmed_to > newhead)
-    log.rollback_info_trimmed_to = newhead;
-
-  log.index();
+  auto divergent = log.rewind_from_head(newhead);
+  if (!divergent.empty()) {
+    mark_dirty_from(divergent.front().version);
+  }
+  for (auto &&entry: divergent) {
+    dout(10) << "rewind_divergent_log future divergent " << entry << dendl;
+  }
+  info.last_update = newhead;
 
   _merge_divergent_entries(
     log,
     divergent,
     info,
-    log.can_rollback_to,
+    log.get_can_rollback_to(),
     missing,
     rollbacker,
     this);
 
-  if (info.last_update < log.can_rollback_to)
-    log.can_rollback_to = info.last_update;
-
   dirty_info = true;
   dirty_big_info = true;
 }
@@ -431,53 +351,41 @@ void PGLog::merge_log(ObjectStore::Transaction& t,
     }
     mark_dirty_from(lower_bound);
 
+    auto divergent = log.rewind_from_head(lower_bound);
     // move aside divergent items
-    mempool::osd::list<pg_log_entry_t> divergent;
-    while (!log.empty()) {
-      pg_log_entry_t &oe = *log.log.rbegin();
-      /*
-       * look at eversion.version here.  we want to avoid a situation like:
-       *  our log: 100'10 (0'0) m 10000004d3a.00000000/head by client4225.1:18529
-       *  new log: 122'10 (0'0) m 10000004d3a.00000000/head by client4225.1:18529
-       *  lower_bound = 100'9
-       * i.e, same request, different version.  If the eversion.version is > the
-       * lower_bound, we it is divergent.
-       */
-      if (oe.version.version <= lower_bound.version)
-       break;
+    for (auto &&oe: divergent) {
       dout(10) << "merge_log divergent " << oe << dendl;
-      divergent.push_front(oe);
-      log.log.pop_back();
     }
+    log.roll_forward_to(log.head, rollbacker);
 
-    mempool::osd::list<pg_log_entry_t> entries;
-    entries.splice(entries.end(), olog.log, from, to);
+    mempool::osd::list<pg_log_entry_t> new_entries;
+    new_entries.splice(new_entries.end(), olog.log, from, to);
     append_log_entries_update_missing(
       info.last_backfill,
       info.last_backfill_bitwise,
-      entries,
+      new_entries,
+      false,
       &log,
       missing,
       rollbacker,
       this);
-    log.index();   
-
-    info.last_update = log.head = olog.head;
-
-    info.last_user_version = oinfo.last_user_version;
-    info.purged_snaps = oinfo.purged_snaps;
 
     _merge_divergent_entries(
       log,
       divergent,
       info,
-      log.can_rollback_to,
+      log.get_can_rollback_to(),
       missing,
       rollbacker,
       this);
 
+    info.last_update = log.head = olog.head;
+
     // We cannot rollback into the new log entries
-    log.can_rollback_to = log.head;
+    log.skip_can_rollback_to_to_head();
+
+    info.last_user_version = oinfo.last_user_version;
+    info.purged_snaps = oinfo.purged_snaps;
 
     changed = true;
   }
@@ -659,8 +567,12 @@ void PGLog::_write_log_and_missing_wo_missing(
     ::encode(divergent_priors, (*km)["divergent_priors"]);
   }
   if (require_rollback) {
-  ::encode(log.can_rollback_to, (*km)["can_rollback_to"]);
-  ::encode(log.rollback_info_trimmed_to, (*km)["rollback_info_trimmed_to"]);
+    ::encode(
+      log.get_can_rollback_to(),
+      (*km)["can_rollback_to"]);
+    ::encode(
+      log.get_rollback_info_trimmed_to(),
+      (*km)["rollback_info_trimmed_to"]);
   }
 
   if (!to_remove.empty())
@@ -753,8 +665,12 @@ void PGLog::_write_log_and_missing(
       }
     });
   if (require_rollback) {
-    ::encode(log.can_rollback_to, (*km)["can_rollback_to"]);
-    ::encode(log.rollback_info_trimmed_to, (*km)["rollback_info_trimmed_to"]);
+    ::encode(
+      log.get_can_rollback_to(),
+      (*km)["can_rollback_to"]);
+    ::encode(
+      log.get_rollback_info_trimmed_to(),
+      (*km)["rollback_info_trimmed_to"]);
   }
 
   if (!to_remove.empty())
index 864866f98f013a8d9997fe9c18ddff7cfa9482a1..786dda6a29b753b1e9341d0570e2c6176347b2a4 100644 (file)
@@ -50,13 +50,13 @@ struct PGLog : DoutPrefixProvider {
       const pg_log_entry_t &entry) = 0;
     virtual void rollforward(
       const pg_log_entry_t &entry) = 0;
+    virtual void trim(
+      const pg_log_entry_t &entry) = 0;
     virtual void remove(
       const hobject_t &hoid) = 0;
     virtual void try_stash(
       const hobject_t &hoid,
       version_t v) = 0;
-    virtual void trim(
-      const pg_log_entry_t &entry) = 0;
     virtual ~LogEntryHandler() {}
   };
 
@@ -73,6 +73,7 @@ struct PGLog : DoutPrefixProvider {
     char buf[512];
   };
 
+public:
   /**
    * IndexLog - adds in-memory index of the log, by oid.
    * plus some methods to manipulate it all.
@@ -83,12 +84,12 @@ struct PGLog : DoutPrefixProvider {
     mutable ceph::unordered_multimap<osd_reqid_t,pg_log_entry_t*> extra_caller_ops;
 
     // recovery pointers
-    list<pg_log_entry_t>::iterator complete_to;  // not inclusive of referenced item
-    version_t last_requested;           // last object requested by primary
+    list<pg_log_entry_t>::iterator complete_to; // not inclusive of referenced item
+    version_t last_requested = 0;               // last object requested by primary
 
     //
   private:
-    mutable __u16 indexed_data;
+    mutable __u16 indexed_data = 0;
     /**
      * rollback_info_trimmed_to_riter points to the first log entry <=
      * rollback_info_trimmed_to
@@ -96,11 +97,13 @@ struct PGLog : DoutPrefixProvider {
      * It's a reverse_iterator because rend() is a natural representation for
      * tail, and rbegin() works nicely for head.
      */
-    list<pg_log_entry_t>::reverse_iterator rollback_info_trimmed_to_riter;
-  public:
+    mempool::osd::list<pg_log_entry_t>::reverse_iterator
+      rollback_info_trimmed_to_riter;
+
     template <typename F>
     void advance_can_rollback_to(eversion_t to, F &&f) {
-      assert(to <= can_rollback_to);
+      if (to > can_rollback_to)
+       can_rollback_to = to;
 
       if (to > rollback_info_trimmed_to)
        rollback_info_trimmed_to = to;
@@ -114,6 +117,49 @@ struct PGLog : DoutPrefixProvider {
        f(*rollback_info_trimmed_to_riter);
       }
     }
+
+    void reset_rollback_info_trimmed_to_riter() {
+      rollback_info_trimmed_to_riter = log.rbegin();
+      while (rollback_info_trimmed_to_riter != log.rend() &&
+            rollback_info_trimmed_to_riter->version > rollback_info_trimmed_to)
+       ++rollback_info_trimmed_to_riter;
+    }
+
+    // indexes objects, caller ops and extra caller ops
+  public:
+    IndexedLog() :
+      complete_to(log.end()),
+      last_requested(0),
+      indexed_data(0),
+      rollback_info_trimmed_to_riter(log.rbegin())
+      {}
+
+    template <typename... Args>
+    IndexedLog(Args&&... args) :
+      pg_log_t(std::forward<Args>(args)...),
+      complete_to(log.end()),
+      last_requested(0),
+      indexed_data(0),
+      rollback_info_trimmed_to_riter(log.rbegin()) {
+      reset_rollback_info_trimmed_to_riter();
+      index();
+    }
+
+    IndexedLog(const IndexedLog &rhs) :
+      pg_log_t(rhs),
+      complete_to(log.end()),
+      last_requested(rhs.last_requested),
+      indexed_data(0),
+      rollback_info_trimmed_to_riter(log.rbegin()) {
+      reset_rollback_info_trimmed_to_riter();
+      index(rhs.indexed_data);
+    }
+    IndexedLog &operator=(const IndexedLog &rhs) {
+      this->~IndexedLog();
+      new (this) IndexedLog(rhs);
+      return *this;
+    }
+
     void trim_rollback_info_to(eversion_t to, LogEntryHandler *h) {
       advance_can_rollback_to(
        to,
@@ -129,30 +175,32 @@ struct PGLog : DoutPrefixProvider {
        });
     }
 
-    /****/
-    IndexedLog() :
-      complete_to(log.end()),
-      last_requested(0),
-      indexed_data(0),
-      rollback_info_trimmed_to_riter(log.rbegin())
-      {}
+    void skip_can_rollback_to_to_head() {
+      advance_can_rollback_to(head, [&](const pg_log_entry_t &entry) {});
+    }
 
+    mempool::osd::list<pg_log_entry_t> rewind_from_head(eversion_t newhead) {
+      auto divergent = pg_log_t::rewind_from_head(newhead);
+      index();
+      reset_rollback_info_trimmed_to_riter();
+      return divergent;
+    }
+
+    /****/
     void claim_log_and_clear_rollback_info(const pg_log_t& o) {
       // we must have already trimmed the old entries
       assert(rollback_info_trimmed_to == head);
       assert(rollback_info_trimmed_to_riter == log.rbegin());
 
-      log = o.log;
-      head = o.head;
-      rollback_info_trimmed_to = head;
-      tail = o.tail;
+      *this = IndexedLog(o);
+
+      skip_can_rollback_to_to_head();
       index();
     }
 
-    void split_into(
+    IndexedLog split_out_child(
       pg_t child_pgid,
-      unsigned split_bits,
-      IndexedLog *olog);
+      unsigned split_bits);
 
     void zero() {
       // we must have already trimmed the old entries
@@ -165,8 +213,7 @@ struct PGLog : DoutPrefixProvider {
       reset_recovery_pointers();
     }
     void clear() {
-      rollback_info_trimmed_to = head;
-      rollback_info_trimmed_to_riter = log.rbegin();
+      skip_can_rollback_to_to_head();
       zero();
     }
     void reset_recovery_pointers() {
@@ -264,85 +311,53 @@ struct PGLog : DoutPrefixProvider {
       }
     }
     
-    void reset_rollback_info_trimmed_to_riter() {
-      rollback_info_trimmed_to_riter = log.rbegin();
-      while (rollback_info_trimmed_to_riter != log.rend() &&
-            rollback_info_trimmed_to_riter->version > rollback_info_trimmed_to)
-       ++rollback_info_trimmed_to_riter;
-    }
+    void index(__u16 to_index = PGLOG_INDEXED_ALL) const {
+      if (to_index & PGLOG_INDEXED_OBJECTS)
+       objects.clear();
+      if (to_index & PGLOG_INDEXED_CALLER_OPS)
+       caller_ops.clear();
+      if (to_index & PGLOG_INDEXED_EXTRA_CALLER_OPS)
+       extra_caller_ops.clear();
 
-    // indexes objects, caller ops and extra caller ops
-    void index() {
-      objects.clear();
-      caller_ops.clear();
-      extra_caller_ops.clear();
-      for (list<pg_log_entry_t>::iterator i = log.begin();
-             i != log.end();
-             ++i) {
-       if (i->object_is_indexed()) {
-         objects[i->soid] = &(*i);
+      for (list<pg_log_entry_t>::const_iterator i = log.begin();
+          i != log.end();
+          ++i) {
+       if (to_index & PGLOG_INDEXED_OBJECTS) {
+         if (i->object_is_indexed()) {
+           objects[i->soid] = const_cast<pg_log_entry_t*>(&(*i));
+         }
        }
 
-        if (i->reqid_is_indexed()) {
-        //assert(caller_ops.count(i->reqid) == 0);  // divergent merge_log indexes new before unindexing old
-          caller_ops[i->reqid] = &(*i);
-        }
+       if (to_index & PGLOG_INDEXED_CALLER_OPS) {
+         if (i->reqid_is_indexed()) {
+           caller_ops[i->reqid] = const_cast<pg_log_entry_t*>(&(*i));
+         }
+       }
         
-        for (vector<pair<osd_reqid_t, version_t> >::const_iterator j =
-              i->extra_reqids.begin();
-              j != i->extra_reqids.end();
-              ++j) {
-            extra_caller_ops.insert(make_pair(j->first, &(*i)));
-        }
+       if (to_index & PGLOG_INDEXED_EXTRA_CALLER_OPS) {
+         for (vector<pair<osd_reqid_t, version_t> >::const_iterator j =
+                i->extra_reqids.begin();
+              j != i->extra_reqids.end();
+              ++j) {
+            extra_caller_ops.insert(
+             make_pair(j->first, const_cast<pg_log_entry_t*>(&(*i))));
+         }
+       }
       }
         
-      indexed_data = PGLOG_INDEXED_ALL;
-      reset_rollback_info_trimmed_to_riter();
+      indexed_data |= to_index;
     }
 
     void index_objects() const {
-      objects.clear();
-      for (list<pg_log_entry_t>::const_iterator i = log.begin();
-            i != log.end();
-            ++i) {
-       if (i->object_is_indexed()) {
-         objects[i->soid] = const_cast<pg_log_entry_t*>(&(*i));
-       }
-       }
-      indexed_data |= PGLOG_INDEXED_OBJECTS;
+      index(PGLOG_INDEXED_OBJECTS);
     }
 
     void index_caller_ops() const {
-      caller_ops.clear();
-      for (list<pg_log_entry_t>::const_iterator i = log.begin();
-             i != log.end();
-             ++i) {
-               
-        if (i->reqid_is_indexed()) {
-        //assert(caller_ops.count(i->reqid) == 0);  // divergent merge_log indexes new before unindexing old
-          caller_ops[i->reqid] = const_cast<pg_log_entry_t*>(&(*i));
-        }        
-      }
-        
-      indexed_data |= PGLOG_INDEXED_CALLER_OPS;
+      index(PGLOG_INDEXED_CALLER_OPS);
     }
 
     void index_extra_caller_ops() const {
-      extra_caller_ops.clear();
-      for (list<pg_log_entry_t>::const_iterator i = log.begin();
-             i != log.end();
-             ++i) {
-               
-        for (vector<pair<osd_reqid_t, version_t> >::const_iterator j =
-              i->extra_reqids.begin();
-              j != i->extra_reqids.end();
-              ++j) {
-            extra_caller_ops.insert(make_pair(j->first, const_cast<pg_log_entry_t*>(&(*i))));
-        }
-      }
-        
-      indexed_data |= PGLOG_INDEXED_EXTRA_CALLER_OPS;        
+      index(PGLOG_INDEXED_EXTRA_CALLER_OPS);
     }
 
     void index(pg_log_entry_t& e) {
@@ -352,17 +367,17 @@ struct PGLog : DoutPrefixProvider {
           objects[e.soid] = &e;
       }
       if (indexed_data & PGLOG_INDEXED_CALLER_OPS) {
+       // divergent merge_log indexes new before unindexing old
         if (e.reqid_is_indexed()) {
-    //assert(caller_ops.count(i->reqid) == 0);  // divergent merge_log indexes new before unindexing old
-    caller_ops[e.reqid] = &e;
+         caller_ops[e.reqid] = &e;
         }
       }
       if (indexed_data & PGLOG_INDEXED_EXTRA_CALLER_OPS) {
         for (vector<pair<osd_reqid_t, version_t> >::const_iterator j =
-         e.extra_reqids.begin();
-       j != e.extra_reqids.end();
-       ++j) {
-    extra_caller_ops.insert(make_pair(j->first, &e));
+              e.extra_reqids.begin();
+            j != e.extra_reqids.end();
+            ++j) {
+         extra_caller_ops.insert(make_pair(j->first, &e));
         }
       }
     }
@@ -380,18 +395,18 @@ struct PGLog : DoutPrefixProvider {
       }
       if (e.reqid_is_indexed()) {
         if (indexed_data & PGLOG_INDEXED_CALLER_OPS) {
-          if (caller_ops.count(e.reqid) &&  // divergent merge_log indexes new before unindexing old
-              caller_ops[e.reqid] == &e)
+         // divergent merge_log indexes new before unindexing old
+          if (caller_ops.count(e.reqid) && caller_ops[e.reqid] == &e)
             caller_ops.erase(e.reqid);    
         }
       }
       if (indexed_data & PGLOG_INDEXED_EXTRA_CALLER_OPS) {
         for (vector<pair<osd_reqid_t, version_t> >::const_iterator j =
-             e.extra_reqids.begin();
+              e.extra_reqids.begin();
              j != e.extra_reqids.end();
              ++j) {
           for (ceph::unordered_multimap<osd_reqid_t,pg_log_entry_t*>::iterator k =
-               extra_caller_ops.find(j->first);
+                extra_caller_ops.find(j->first);
                k != extra_caller_ops.end() && k->first == j->first;
                ++k) {
             if (k->second == &e) {
@@ -408,12 +423,6 @@ struct PGLog : DoutPrefixProvider {
       // add to log
       log.push_back(e);
 
-      /**
-       * Make sure we don't keep around more than we need to in the
-       * in-memory log
-       */
-      log.back().mod_desc.trim_bl();
-
       // riter previously pointed to the previous entry
       if (rollback_info_trimmed_to_riter == log.rbegin())
        ++rollback_info_trimmed_to_riter;
@@ -428,28 +437,25 @@ struct PGLog : DoutPrefixProvider {
       }
       if (indexed_data & PGLOG_INDEXED_CALLER_OPS) {
         if (e.reqid_is_indexed()) {
-    caller_ops[e.reqid] = &(log.back());
+         caller_ops[e.reqid] = &(log.back());
         }
       }
       
       if (indexed_data & PGLOG_INDEXED_EXTRA_CALLER_OPS) {
         for (vector<pair<osd_reqid_t, version_t> >::const_iterator j =
-         e.extra_reqids.begin();
-       j != e.extra_reqids.end();
-       ++j) {
-    extra_caller_ops.insert(make_pair(j->first, &(log.back())));
+              e.extra_reqids.begin();
+            j != e.extra_reqids.end();
+            ++j) {
+         extra_caller_ops.insert(make_pair(j->first, &(log.back())));
         }
       }
     }
 
     void trim(
-      LogEntryHandler *handler,
       eversion_t s,
       set<eversion_t> *trimmed);
 
     ostream& print(ostream& out) const;
-
-    void filter_log(spg_t pgid, const OSDMap &map, const string &hit_set_namespace);
   };
 
 
@@ -583,9 +589,9 @@ public:
 
   void unindex() { log.unindex(); }
 
-  void add(const pg_log_entry_t& e) {
+  void add(const pg_log_entry_t& e, bool applied = true) {
     mark_writeout_from(e.version);
-    log.add(e);
+    log.add(e, applied);
   }
 
   void reset_recovery_pointers() { log.reset_recovery_pointers(); }
@@ -595,22 +601,19 @@ public:
     ObjectStore::Transaction *t);
 
   void trim(
-    LogEntryHandler *handler,
     eversion_t trim_to,
     pg_info_t &info);
 
   void roll_forward_to(
     eversion_t roll_forward_to,
     LogEntryHandler *h) {
-    if (roll_forward_to > log.can_rollback_to)
-      log.can_rollback_to = roll_forward_to;
     log.roll_forward_to(
       roll_forward_to,
       h);
   }
 
-  eversion_t get_rollback_trimmed_to() const {
-    return log.rollback_info_trimmed_to;
+  eversion_t get_can_rollback_to() const {
+    return log.get_can_rollback_to();
   }
 
   void roll_forward(LogEntryHandler *h) {
@@ -622,7 +625,6 @@ public:
   //////////////////// get or set log & missing ////////////////////
 
   void reset_backfill_claim_log(const pg_log_t &o, LogEntryHandler *h) {
-    log.can_rollback_to = log.head;
     log.trim_rollback_info_to(log.head, h);
     log.claim_log_and_clear_rollback_info(o);
     missing.clear();
@@ -633,7 +635,7 @@ public:
       pg_t child_pgid,
       unsigned split_bits,
       PGLog *opg_log) { 
-    log.split_into(child_pgid, split_bits, &(opg_log->log));
+    opg_log->log = log.split_out_child(child_pgid, split_bits);
     missing.split_into(child_pgid, split_bits, &(opg_log->missing));
     opg_log->mark_dirty_to(eversion_t::max());
     mark_dirty_to(eversion_t::max());
@@ -659,8 +661,7 @@ public:
       }
     }
 
-    if (log.can_rollback_to < v)
-      log.can_rollback_to = v;
+    assert(log.get_can_rollback_to() >= v);
   }
 
   void activate_not_complete(pg_info_t &info) {
@@ -749,9 +750,6 @@ protected:
        assert(i->prior_version == last);
       }
       last = i->version;
-
-      if (rollbacker)
-       rollbacker->trim(*i);
     }
 
     const eversion_t prior_version = entries.begin()->prior_version;
@@ -771,11 +769,11 @@ protected:
     if (objiter != log.objects.end() &&
        objiter->second->version >= first_divergent_update) {
       /// Case 1)
-      assert(objiter->second->version > last_divergent_update);
-
       ldpp_dout(dpp, 10) << __func__ << ": more recent entry found: "
                         << *objiter->second << ", already merged" << dendl;
 
+      assert(objiter->second->version > last_divergent_update);
+
       // ensure missing has been updated appropriately
       if (objiter->second->is_update()) {
        assert(missing.is_missing(hoid) &&
@@ -784,8 +782,14 @@ protected:
        assert(!missing.is_missing(hoid));
       }
       missing.revise_have(hoid, eversion_t());
-      if (rollbacker && !object_not_in_store)
-       rollbacker->remove(hoid);
+      if (rollbacker) {
+       if (!object_not_in_store) {
+         rollbacker->remove(hoid);
+       }
+       for (auto &&i: entries) {
+         rollbacker->trim(i);
+       }
+      }
       return;
     }
 
@@ -799,8 +803,14 @@ protected:
                         << dendl;
       if (missing.is_missing(hoid))
        missing.rm(missing.get_items().find(hoid));
-      if (rollbacker && !object_not_in_store)
-       rollbacker->remove(hoid);
+      if (rollbacker) {
+       if (!object_not_in_store) {
+         rollbacker->remove(hoid);
+       }
+       for (auto &&i: entries) {
+         rollbacker->trim(i);
+       }
+      }
       return;
     }
 
@@ -827,6 +837,11 @@ protected:
                             << info.log_tail << dendl;
        }
       }
+      if (rollbacker) {
+       for (auto &&i: entries) {
+         rollbacker->trim(i);
+       }
+      }
       return;
     }
 
@@ -839,7 +854,7 @@ protected:
     for (list<pg_log_entry_t>::const_reverse_iterator i = entries.rbegin();
         i != entries.rend();
         ++i) {
-      if (!i->mod_desc.can_rollback() || i->version <= olog_can_rollback_to) {
+      if (!i->can_rollback() || i->version <= olog_can_rollback_to) {
        ldpp_dout(dpp, 10) << __func__ << ": hoid " << hoid << " cannot rollback "
                           << *i << dendl;
        can_rollback = false;
@@ -852,7 +867,7 @@ protected:
       for (list<pg_log_entry_t>::const_reverse_iterator i = entries.rbegin();
           i != entries.rend();
           ++i) {
-       assert(i->mod_desc.can_rollback() && i->version > olog_can_rollback_to);
+       assert(i->can_rollback() && i->version > olog_can_rollback_to);
        ldpp_dout(dpp, 10) << __func__ << ": hoid " << hoid
                           << " rolling back " << *i << dendl;
        if (rollbacker)
@@ -865,8 +880,13 @@ protected:
       /// Case 5)
       ldpp_dout(dpp, 10) << __func__ << ": hoid " << hoid << " cannot roll back, "
                         << "removing and adding to missing" << dendl;
-      if (rollbacker && !object_not_in_store)
-       rollbacker->remove(hoid);
+      if (rollbacker) {
+       if (!object_not_in_store)
+         rollbacker->remove(hoid);
+       for (auto &&i: entries) {
+         rollbacker->trim(i);
+       }
+      }
       missing.add(hoid, prior_version, eversion_t());
       if (prior_version <= info.log_tail) {
        ldpp_dout(dpp, 10) << __func__ << ": hoid " << hoid
@@ -921,7 +941,7 @@ protected:
       oe.soid,
       entries,
       info,
-      log.can_rollback_to,
+      log.get_can_rollback_to(),
       missing,
       rollbacker,
       this);
@@ -941,6 +961,7 @@ public:
     const hobject_t &last_backfill,
     bool last_backfill_bitwise,
     const mempool::osd::list<pg_log_entry_t> &entries,
+    bool maintain_rollback,
     IndexedLog *log,
     missing_type &missing,
     LogEntryHandler *rollbacker,
@@ -948,24 +969,21 @@ public:
     bool invalidate_stats = false;
     if (log && !entries.empty()) {
       assert(log->head < entries.begin()->version);
-      log->head = entries.rbegin()->version;
     }
     for (list<pg_log_entry_t>::const_iterator p = entries.begin();
         p != entries.end();
         ++p) {
       invalidate_stats = invalidate_stats || !p->is_error();
       if (log) {
-       log->log.push_back(*p);
-       pg_log_entry_t &ne = log->log.back();
-       ldpp_dout(dpp, 20) << "update missing, append " << ne << dendl;
-       log->index(ne);
+       ldpp_dout(dpp, 20) << "update missing, append " << *p << dendl;
+       log->add(*p);
       }
       if (cmp(p->soid, last_backfill, last_backfill_bitwise) <= 0 &&
          !p->is_error()) {
        missing.add_next_event(*p);
        if (rollbacker) {
          // hack to match PG::mark_all_unfound_lost
-         if (p->is_lost_delete() && p->mod_desc.can_rollback()) {
+         if (maintain_rollback && p->is_lost_delete() && p->can_rollback()) {
            rollbacker->try_stash(p->soid, p->version.version);
          } else if (p->is_delete()) {
            rollbacker->remove(p->soid);
@@ -973,8 +991,6 @@ public:
        }
       }
     }
-    if (log)
-      log->reset_rollback_info_trimmed_to_riter();
     return invalidate_stats;
   }
   bool append_new_log_entries(
@@ -986,6 +1002,7 @@ public:
       last_backfill,
       last_backfill_bitwise,
       entries,
+      true,
       &log,
       missing,
       rollbacker,
@@ -1087,13 +1104,13 @@ public:
     assert(r == 0);
     assert(st.st_size == 0);
 
-    log.tail = info.log_tail;
     // will get overridden below if it had been recorded
-    log.can_rollback_to = info.last_update;
-    log.rollback_info_trimmed_to = eversion_t();
+    eversion_t on_disk_can_rollback_to = info.last_update;
+    eversion_t on_disk_rollback_info_trimmed_to = eversion_t();
     ObjectMap::ObjectMapIterator p = store->get_omap_iterator(log_coll, log_oid);
     map<eversion_t, hobject_t> divergent_priors;
     bool has_divergent_priors = false;
+    list<pg_log_entry_t> entries;
     if (p) {
       for (p->seek_to_first(); p->valid() ; p->next(false)) {
        // non-log pgmeta_oid keys are prefixed with _; skip those
@@ -1108,9 +1125,9 @@ public:
          has_divergent_priors = true;
          debug_verify_stored_missing = false;
        } else if (p->key() == "can_rollback_to") {
-         ::decode(log.can_rollback_to, bp);
+         ::decode(on_disk_can_rollback_to, bp);
        } else if (p->key() == "rollback_info_trimmed_to") {
-         ::decode(log.rollback_info_trimmed_to, bp);
+         ::decode(on_disk_rollback_info_trimmed_to, bp);
        } else if (p->key().substr(0, 7) == string("missing")) {
          pair<hobject_t, pg_missing_item> p;
          ::decode(p, bp);
@@ -1119,20 +1136,23 @@ public:
          pg_log_entry_t e;
          e.decode_with_checksum(bp);
          ldpp_dout(dpp, 20) << "read_log_and_missing " << e << dendl;
-         if (!log.log.empty()) {
-           pg_log_entry_t last_e(log.log.back());
+         if (!entries.empty()) {
+           pg_log_entry_t last_e(entries.back());
            assert(last_e.version.version < e.version.version);
            assert(last_e.version.epoch <= e.version.epoch);
          }
-         log.log.push_back(e);
-         log.head = e.version;
+         entries.push_back(e);
          if (log_keys_debug)
            log_keys_debug->insert(e.get_key_name());
        }
       }
     }
-    log.head = info.last_update;
-    log.reset_rollback_info_trimmed_to_riter();
+    log = IndexedLog(
+      info.last_update,
+      info.log_tail,
+      on_disk_can_rollback_to,
+      on_disk_rollback_info_trimmed_to,
+      std::move(entries));
 
     if (has_divergent_priors || debug_verify_stored_missing) {
       // build missing
@@ -1155,9 +1175,6 @@ public:
          if (did.count(i->soid)) continue;
          did.insert(i->soid);
 
-         if (i->version > log.can_rollback_to && i->is_rollforward())
-           checked.insert(i->soid);
-
          if (i->is_delete()) continue;
 
          bufferlist bv;
index a34e1401dcd7afd634db6a6d9b5d18f4cad1ce8c..795e34dae3a56b2cfc3917211365c97e45f3ce72 100644 (file)
@@ -376,6 +376,7 @@ public:
 void generate_transaction(
   PGTransactionUPtr &pgt,
   const coll_t &coll,
+  bool legacy_log_entries,
   vector<pg_log_entry_t> &log_entries,
   ObjectStore::Transaction *t,
   set<hobject_t, hobject_t::BitwiseComparator> *added,
@@ -386,7 +387,7 @@ void generate_transaction(
   assert(removed);
 
   for (auto &&le: log_entries) {
-    le.mod_desc.mark_unrollbackable();
+    le.mark_unrollbackable();
     auto oiter = pgt->op_map.find(le.soid);
     if (oiter != pgt->op_map.end() && oiter->second.updated_snaps) {
       vector<snapid_t> snaps(
@@ -540,6 +541,7 @@ void ReplicatedBackend::submit_transaction(
   generate_transaction(
     t,
     coll,
+    !get_osdmap()->test_flag(CEPH_OSDMAP_REQUIRE_KRAKEN),
     log_entries,
     &op_t,
     &added,
index a72cc797bfc5fe50944751f19c33fea2ff27b457..5e1a4a9e10eb519c14ef9755a7b8e66488345a04 100644 (file)
@@ -9834,7 +9834,7 @@ void ReplicatedPG::mark_all_unfound_lost(
          pg_log_entry_t::LOST_REVERT, oid, v,
          m->second.need, 0, osd_reqid_t(), mtime, 0);
        e.reverting_to = prev;
-       e.mod_desc.mark_unrollbackable();
+       e.mark_unrollbackable();
        log_entries.push_back(e);
        dout(10) << e << dendl;
 
@@ -9852,7 +9852,7 @@ void ReplicatedPG::mark_all_unfound_lost(
          if (pool.info.require_rollback()) {
            e.mod_desc.try_rmobject(v.version);
          } else {
-           e.mod_desc.mark_unrollbackable();
+           e.mark_unrollbackable();
          }
        } // otherwise, just do what we used to do
        dout(10) << e << dendl;
@@ -9995,9 +9995,8 @@ void ReplicatedPG::on_removal(ObjectStore::Transaction *t)
 
 
   // clear log
-  PGLogEntryHandler rollbacker;
+  PGLogEntryHandler rollbacker{this, t};
   pg_log.roll_forward(&rollbacker);
-  rollbacker.apply(this, t);
 
   write_if_dirty(*t);
 
index ad3ece3a74ad5dda118f8d59ddcd7705545bc823..7b99a78773e38a5143d5dfff0a960d47045a5d71 100644 (file)
@@ -337,6 +337,18 @@ public:
     map<string, bufferlist> &attrs) override {
     return get_object_context(hoid, true, &attrs);
   }
+  void pgb_set_object_snap_mapping(
+    const hobject_t &soid,
+    const set<snapid_t> &snaps,
+    ObjectStore::Transaction *t) override {
+    return update_object_snap_mapping(t, soid, snaps);
+  }
+  void pgb_clear_object_snap_mapping(
+    const hobject_t &soid,
+    ObjectStore::Transaction *t) override {
+    return clear_object_snap_mapping(t, soid);
+  }
+
 
   void log_operation(
     const vector<pg_log_entry_t> &logv,
index 431dc05478f0b090d9e740bcb2ce1aeedcd541e1..3c3ab2fff6ca1cd6e78ccd6fe2f85b849e7e31fe 100644 (file)
@@ -2900,6 +2900,7 @@ struct pg_log_t {
   eversion_t head;    // newest entry
   eversion_t tail;    // version prior to oldest
 
+protected:
   // We can rollback rollback-able entries > can_rollback_to
   eversion_t can_rollback_to;
 
@@ -2907,16 +2908,107 @@ struct pg_log_t {
   // data can be found
   eversion_t rollback_info_trimmed_to;
 
+public:
   mempool::osd::list<pg_log_entry_t> log;  // the actual log.
   
-  pg_log_t() {}
+  pg_log_t() = default;
+  pg_log_t(const eversion_t &last_update,
+          const eversion_t &log_tail,
+          const eversion_t &can_rollback_to,
+          const eversion_t &rollback_info_trimmed_to,
+          mempool::osd::list<pg_log_entry_t> &&entries)
+    : head(last_update), tail(log_tail), can_rollback_to(can_rollback_to),
+      rollback_info_trimmed_to(rollback_info_trimmed_to),
+      log(std::move(entries)) {}
+  pg_log_t(const eversion_t &last_update,
+          const eversion_t &log_tail,
+          const eversion_t &can_rollback_to,
+          const eversion_t &rollback_info_trimmed_to,
+          const std::list<pg_log_entry_t> &entries)
+    : head(last_update), tail(log_tail), can_rollback_to(can_rollback_to),
+      rollback_info_trimmed_to(rollback_info_trimmed_to) {
+    for (auto &&entry: entries) {
+      log.push_back(entry);
+    }
+  }
 
   void clear() {
     eversion_t z;
-    can_rollback_to = head = tail = z;
+    rollback_info_trimmed_to = can_rollback_to = head = tail = z;
     log.clear();
   }
 
+  eversion_t get_rollback_info_trimmed_to() const {
+    return rollback_info_trimmed_to;
+  }
+  eversion_t get_can_rollback_to() const {
+    return can_rollback_to;
+  }
+
+
+  pg_log_t split_out_child(pg_t child_pgid, unsigned split_bits) {
+    mempool::osd::list<pg_log_entry_t> oldlog, childlog;
+    oldlog.swap(log);
+
+    eversion_t old_tail;
+    unsigned mask = ~((~0)<<split_bits);
+    for (auto i = oldlog.begin();
+        i != oldlog.end();
+      ) {
+      if ((i->soid.get_hash() & mask) == child_pgid.m_seed) {
+       childlog.push_back(*i);
+      } else {
+       log.push_back(*i);
+      }
+      oldlog.erase(i++);
+    }
+
+    return pg_log_t(
+      head,
+      tail,
+      can_rollback_to,
+      rollback_info_trimmed_to,
+      std::move(childlog));
+  }
+
+  mempool::osd::list<pg_log_entry_t> rewind_from_head(eversion_t newhead) {
+    assert(newhead >= tail);
+
+    mempool::osd::list<pg_log_entry_t>::iterator p = log.end();
+    mempool::osd::list<pg_log_entry_t> divergent;
+    while (true) {
+      if (p == log.begin()) {
+       // yikes, the whole thing is divergent!
+       ::swap(divergent, log);
+       break;
+      }
+      --p;
+      if (p->version.version <= newhead.version) {
+       /*
+        * look at eversion.version here.  we want to avoid a situation like:
+        *  our log: 100'10 (0'0) m 10000004d3a.00000000/head by client4225.1:18529
+        *  new log: 122'10 (0'0) m 10000004d3a.00000000/head by client4225.1:18529
+        *  lower_bound = 100'9
+        * i.e, same request, different version.  If the eversion.version is > the
+        * lower_bound, we it is divergent.
+        */
+       ++p;
+       divergent.splice(divergent.begin(), log, p, log.end());
+       break;
+      }
+      assert(p->version > newhead);
+    }
+    head = newhead;
+
+    if (can_rollback_to > newhead)
+      can_rollback_to = newhead;
+
+    if (rollback_info_trimmed_to > newhead)
+      rollback_info_trimmed_to = newhead;
+
+    return divergent;
+  }
+
   bool empty() const {
     return log.empty();
   }
@@ -2970,7 +3062,7 @@ WRITE_CLASS_ENCODER(pg_log_t)
 inline ostream& operator<<(ostream& out, const pg_log_t& log) 
 {
   out << "log((" << log.tail << "," << log.head << "], crt="
-      << log.can_rollback_to << ")";
+      << log.get_can_rollback_to() << ")";
   return out;
 }
 
index 035c489e67728251bbbfdfed18dd1b589f90119b..4c2bb06793a23ea43b40ef65bb06ed8f1665ba6b 100644 (file)
@@ -1972,19 +1972,9 @@ TEST_F(PGLogTest, filter_log_1) {
 
     const string hit_set_namespace("internal");
 
-    ObjectStore::Transaction t;
-    pg_info_t info;
-    list<hobject_t> remove_snap;
-    //bool dirty_info = false;
-    //bool dirty_big_info = false;
-
-    hobject_t divergent_object;
-    eversion_t divergent_version;
-    eversion_t prior_version;
-    eversion_t newhead;
     {
       pg_log_entry_t e;
-      e.mod_desc.mark_unrollbackable();
+      e.mark_unrollbackable();
       e.op = pg_log_entry_t::MODIFY;
       e.soid.pool = pool_id;
 
@@ -2024,12 +2014,22 @@ TEST_F(PGLogTest, filter_log_1) {
     ASSERT_EQ(total, num_objects);
 
     // Some should be removed
-    log.filter_log(pgid, *osdmap, hit_set_namespace);
+    {
+      pg_log_t filtered, reject;
+      pg_log_t::filter_log(
+       pgid, *osdmap, hit_set_namespace, log, filtered, reject);
+      log = IndexedLog(filtered);
+    }
     EXPECT_LE(log.log.size(), (size_t)total);
 
     // If we filter a second time, there should be the same total
     total = log.log.size();
-    log.filter_log(pgid, *osdmap, hit_set_namespace);
+    {
+      pg_log_t filtered, reject;
+      pg_log_t::filter_log(
+       pgid, *osdmap, hit_set_namespace, log, filtered, reject);
+      log = IndexedLog(filtered);
+    }
     EXPECT_EQ(log.log.size(), (size_t)total);
 
     // Increase pg_num as if there would be a split
@@ -2046,7 +2046,12 @@ TEST_F(PGLogTest, filter_log_1) {
     ASSERT_EQ(ret, 0);
 
     // We should have fewer entries after a filter
-    log.filter_log(pgid, *osdmap, hit_set_namespace);
+    {
+      pg_log_t filtered, reject;
+      pg_log_t::filter_log(
+       pgid, *osdmap, hit_set_namespace, log, filtered, reject);
+      log = IndexedLog(filtered);
+    }
     EXPECT_LE(log.log.size(), (size_t)total);
 
     // Make sure all internal entries are retained