]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Include dups in copy_after() and copy_up_to()
authorDavid Zafman <dzafman@redhat.com>
Thu, 2 May 2019 02:47:55 +0000 (19:47 -0700)
committerDavid Zafman <dzafman@redhat.com>
Thu, 28 Mar 2019 08:27:04 +0000 (08:27 +0000)
Client saw out of order results in a test with a very small
pg_log because a backfill/recovery sent a small number of
log entries for duplicate checks.

Since these copy operations are equivalent to both a copy
and trim (an argument controls how many log entries
transfer), we need to include any new dups with enough
existing dups up to the configured maximum.

Fixes: http://tracker.ceph.com/issues/39304
Signed-off-by: David Zafman <dzafman@redhat.com>
(cherry picked from commit 00f279db37fc648584c4c741e0453b1d4f4320e4)

Conflicts:
src/osd/PeeringState.cc contents in src/osd/PG.cc
src/osd/osd_types.cc (trivial)

src/osd/PG.cc
src/osd/osd_types.cc
src/osd/osd_types.h

index 98758ef1a40b3497a5bec0ad0445887d11cd7066..9634b779e852d3cc9bdd05c126d42f9af3803ae6 100644 (file)
@@ -1792,7 +1792,7 @@ void PG::activate(ObjectStore::Transaction& t,
          get_osdmap()->get_epoch(), pi);
 
        // send some recent log, so that op dup detection works well.
-       m->log.copy_up_to(pg_log.get_log(), cct->_conf->osd_min_pg_log_entries);
+       m->log.copy_up_to(cct, pg_log.get_log(), cct->_conf->osd_min_pg_log_entries);
        m->info.log_tail = m->log.tail;
        pi.log_tail = m->log.tail;  // sigh...
 
@@ -1804,7 +1804,7 @@ void PG::activate(ObjectStore::Transaction& t,
          i->shard, pg_whoami.shard,
          get_osdmap()->get_epoch(), info);
        // send new stuff to append to replicas log
-       m->log.copy_after(pg_log.get_log(), pi.last_update);
+       m->log.copy_after(cct, pg_log.get_log(), pi.last_update);
       }
 
       // share past_intervals if we are creating the pg on the replica
@@ -5666,7 +5666,7 @@ void PG::fulfill_log(
                        << ", sending full log instead";
       mlog->log = pg_log.get_log();           // primary should not have requested this!!
     } else
-      mlog->log.copy_after(pg_log.get_log(), query.since);
+      mlog->log.copy_after(cct, pg_log.get_log(), query.since);
   }
   else if (query.type == pg_query_t::FULLLOG) {
     dout(10) << " sending info+missing+full log" << dendl;
index b8acccc17dfdfa028cf1f90680de7871f30475b2..99c66713177ea75cdc6548c54d2a8d1530bbca11 100644 (file)
@@ -4339,11 +4339,41 @@ void pg_log_t::generate_test_instances(list<pg_log_t*>& o)
     o.back()->log.push_back(**p);
 }
 
-void pg_log_t::copy_after(const pg_log_t &other, eversion_t v) 
+static void _handle_dups(CephContext* cct, pg_log_t &target, const pg_log_t &other, unsigned maxdups)
+{
+  auto earliest_dup_version =
+               target.head.version < maxdups ? 0u : target.head.version - maxdups + 1;
+  lgeneric_subdout(cct, osd, 20) << "copy_up_to/copy_after earliest_dup_version " << earliest_dup_version << dendl;
+
+  for (auto d = other.dups.cbegin(); d != other.dups.cend(); ++d) {
+    if (d->version.version >= earliest_dup_version) {
+      lgeneric_subdout(cct, osd, 20)
+             << "copy_up_to/copy_after copy dup version "
+             << d->version << dendl;
+      target.dups.push_back(pg_log_dup_t(*d));
+    }
+  }
+
+  for (auto i = other.log.cbegin(); i != other.log.cend(); ++i) {
+    ceph_assert(i->version > other.tail);
+    if (i->version > target.tail)
+      break;
+    if (i->version.version >= earliest_dup_version) {
+      lgeneric_subdout(cct, osd, 20)
+               << "copy_up_to/copy_after copy dup from log version "
+               << i->version << dendl;
+      target.dups.push_back(pg_log_dup_t(*i));
+    }
+  }
+}
+
+
+void pg_log_t::copy_after(CephContext* cct, const pg_log_t &other, eversion_t v)
 {
   can_rollback_to = other.can_rollback_to;
   head = other.head;
   tail = other.tail;
+  lgeneric_subdout(cct, osd, 20) << __func__ << " v " << v << dendl;
   for (list<pg_log_entry_t>::const_reverse_iterator i = other.log.rbegin();
        i != other.log.rend();
        ++i) {
@@ -4353,25 +4383,31 @@ void pg_log_t::copy_after(const pg_log_t &other, eversion_t v)
       tail = i->version;
       break;
     }
+    lgeneric_subdout(cct, osd, 20) << __func__ << " copy log version " << i->version << dendl;
     log.push_front(*i);
   }
+  _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked);
 }
 
-void pg_log_t::copy_up_to(const pg_log_t &other, int max)
+void pg_log_t::copy_up_to(CephContext* cct, const pg_log_t &other, int max)
 {
   can_rollback_to = other.can_rollback_to;
   int n = 0;
   head = other.head;
   tail = other.tail;
+  lgeneric_subdout(cct, osd, 20) << __func__ << " max " << max << dendl;
   for (list<pg_log_entry_t>::const_reverse_iterator i = other.log.rbegin();
        i != other.log.rend();
        ++i) {
+    ceph_assert(i->version > other.tail);
     if (n++ >= max) {
       tail = i->version;
       break;
     }
+    lgeneric_subdout(cct, osd, 20) << __func__ << " copy log version " << i->version << dendl;
     log.push_front(*i);
   }
+  _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked);
 }
 
 ostream& pg_log_t::print(ostream& out) const
index ad16d59669192ad19c7647778c6184ba1ad739c5..6ac229ee431cf246b4448c53885499b1b7134c19 100644 (file)
@@ -3666,7 +3666,7 @@ public:
    * @param other pg_log_t to copy from
    * @param from copy entries after this version
    */
-  void copy_after(const pg_log_t &other, eversion_t from);
+  void copy_after(CephContext* cct, const pg_log_t &other, eversion_t from);
 
   /**
    * copy up to N entries
@@ -3674,7 +3674,7 @@ public:
    * @param other source log
    * @param max max number of entries to copy
    */
-  void copy_up_to(const pg_log_t &other, int max);
+  void copy_up_to(CephContext* cct, const pg_log_t &other, int max);
 
   ostream& print(ostream& out) const;