]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: Include dups in copy_after() and copy_up_to()
authorDavid Zafman <dzafman@redhat.com>
Thu, 2 May 2019 02:47:55 +0000 (19:47 -0700)
committerDavid Zafman <dzafman@redhat.com>
Fri, 10 May 2019 00:50:27 +0000 (17:50 -0700)
Client saw out of order results in a test with a very small
pg_log because a backfill/recovery sent a small number of
log entries for duplicate checks.

Since these copy operations are equivalent to both a copy
and trim (an argument controls how many log entries
transfer), we need to include any new dups with enough
existing dups up to the configured maximum.

Fixes: http://tracker.ceph.com/issues/39304
Signed-off-by: David Zafman <dzafman@redhat.com>
src/osd/PeeringState.cc
src/osd/osd_types.cc
src/osd/osd_types.h

index 79a07ab8b4aae46bc66e8757c0ebe1fc197b80a7..6a1b97bed8454679975f999081825c08a73d9c95 100644 (file)
@@ -2272,7 +2272,7 @@ void PeeringState::activate(
          last_peering_reset /* epoch to create pg at */);
 
        // send some recent log, so that op dup detection works well.
-       m->log.copy_up_to(pg_log.get_log(), cct->_conf->osd_min_pg_log_entries);
+       m->log.copy_up_to(cct, pg_log.get_log(), cct->_conf->osd_min_pg_log_entries);
        m->info.log_tail = m->log.tail;
        pi.log_tail = m->log.tail;  // sigh...
 
@@ -2285,7 +2285,7 @@ void PeeringState::activate(
          get_osdmap_epoch(), info,
          last_peering_reset /* epoch to create pg at */);
        // send new stuff to append to replicas log
-       m->log.copy_after(pg_log.get_log(), pi.last_update);
+       m->log.copy_after(cct, pg_log.get_log(), pi.last_update);
       }
 
       // share past_intervals if we are creating the pg on the replica
@@ -2581,7 +2581,7 @@ void PeeringState::fulfill_log(
                             << ", sending full log instead";
       mlog->log = pg_log.get_log();           // primary should not have requested this!!
     } else
-      mlog->log.copy_after(pg_log.get_log(), query.since);
+      mlog->log.copy_after(cct, pg_log.get_log(), query.since);
   }
   else if (query.type == pg_query_t::FULLLOG) {
     psdout(10) << " sending info+missing+full log" << dendl;
index 817a3a5dd7d699eacddb02015ed8abe3e372c1c2..6885e6d9f6777506d215e88e148530a939ca2527 100644 (file)
@@ -4654,11 +4654,41 @@ void pg_log_t::generate_test_instances(list<pg_log_t*>& o)
     o.back()->log.push_back(**p);
 }
 
-void pg_log_t::copy_after(const pg_log_t &other, eversion_t v) 
+static void _handle_dups(CephContext* cct, pg_log_t &target, const pg_log_t &other, unsigned maxdups)
+{
+  auto earliest_dup_version =
+               target.head.version < maxdups ? 0u : target.head.version - maxdups + 1;
+  lgeneric_subdout(cct, osd, 20) << "copy_up_to/copy_after earliest_dup_version " << earliest_dup_version << dendl;
+
+  for (auto d = other.dups.cbegin(); d != other.dups.cend(); ++d) {
+    if (d->version.version >= earliest_dup_version) {
+      lgeneric_subdout(cct, osd, 20)
+             << "copy_up_to/copy_after copy dup version "
+             << d->version << dendl;
+      target.dups.push_back(pg_log_dup_t(*d));
+    }
+  }
+
+  for (auto i = other.log.cbegin(); i != other.log.cend(); ++i) {
+    ceph_assert(i->version > other.tail);
+    if (i->version > target.tail)
+      break;
+    if (i->version.version >= earliest_dup_version) {
+      lgeneric_subdout(cct, osd, 20)
+               << "copy_up_to/copy_after copy dup from log version "
+               << i->version << dendl;
+      target.dups.push_back(pg_log_dup_t(*i));
+    }
+  }
+}
+
+
+void pg_log_t::copy_after(CephContext* cct, const pg_log_t &other, eversion_t v)
 {
   can_rollback_to = other.can_rollback_to;
   head = other.head;
   tail = other.tail;
+  lgeneric_subdout(cct, osd, 20) << __func__ << " v " << v << dendl;
   for (auto i = other.log.crbegin(); i != other.log.crend(); ++i) {
     ceph_assert(i->version > other.tail);
     if (i->version <= v) {
@@ -4666,23 +4696,29 @@ void pg_log_t::copy_after(const pg_log_t &other, eversion_t v)
       tail = i->version;
       break;
     }
+    lgeneric_subdout(cct, osd, 20) << __func__ << " copy log version " << i->version << dendl;
     log.push_front(*i);
   }
+  _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked);
 }
 
-void pg_log_t::copy_up_to(const pg_log_t &other, int max)
+void pg_log_t::copy_up_to(CephContext* cct, const pg_log_t &other, int max)
 {
   can_rollback_to = other.can_rollback_to;
   int n = 0;
   head = other.head;
   tail = other.tail;
+  lgeneric_subdout(cct, osd, 20) << __func__ << " max " << max << dendl;
   for (auto i = other.log.crbegin(); i != other.log.crend(); ++i) {
+    ceph_assert(i->version > other.tail);
     if (n++ >= max) {
       tail = i->version;
       break;
     }
+    lgeneric_subdout(cct, osd, 20) << __func__ << " copy log version " << i->version << dendl;
     log.push_front(*i);
   }
+  _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked);
 }
 
 ostream& pg_log_t::print(ostream& out) const
index b648e65b5be47e7249fa83256fc2f1d8bdcc4cbb..9bb98448245969c31feb987d31a88de96c8da165 100644 (file)
@@ -4110,7 +4110,7 @@ public:
    * @param other pg_log_t to copy from
    * @param from copy entries after this version
    */
-  void copy_after(const pg_log_t &other, eversion_t from);
+  void copy_after(CephContext* cct, const pg_log_t &other, eversion_t from);
 
   /**
    * copy up to N entries
@@ -4118,7 +4118,7 @@ public:
    * @param other source log
    * @param max max number of entries to copy
    */
-  void copy_up_to(const pg_log_t &other, int max);
+  void copy_up_to(CephContext* cct, const pg_log_t &other, int max);
 
   std::ostream& print(std::ostream& out) const;