From d8af086af1fa35f4535066fa45a2e89d8053f34d Mon Sep 17 00:00:00 2001 From: David Zafman Date: Wed, 1 May 2019 19:47:55 -0700 Subject: [PATCH] osd: Include dups in copy_after() and copy_up_to() Client saw out of order results in a test with a very small pg_log because a backfill/recovery sent a small number of log entries for duplicate checks. Since these copy operations are equivalent to both a copy and trim (an argument controls how many log entries transfer), we need to include any new dups with enough existing dups up to the configured maximum. Fixes: http://tracker.ceph.com/issues/39304 Signed-off-by: David Zafman (cherry picked from commit 00f279db37fc648584c4c741e0453b1d4f4320e4) Conflicts: src/osd/PeeringState.cc contents in src/osd/PG.cc src/osd/osd_types.cc (trivial) --- src/osd/PG.cc | 6 +++--- src/osd/osd_types.cc | 40 ++++++++++++++++++++++++++++++++++++++-- src/osd/osd_types.h | 4 ++-- 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 98758ef1a40..9634b779e85 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1792,7 +1792,7 @@ void PG::activate(ObjectStore::Transaction& t, get_osdmap()->get_epoch(), pi); // send some recent log, so that op dup detection works well. - m->log.copy_up_to(pg_log.get_log(), cct->_conf->osd_min_pg_log_entries); + m->log.copy_up_to(cct, pg_log.get_log(), cct->_conf->osd_min_pg_log_entries); m->info.log_tail = m->log.tail; pi.log_tail = m->log.tail; // sigh... @@ -1804,7 +1804,7 @@ void PG::activate(ObjectStore::Transaction& t, i->shard, pg_whoami.shard, get_osdmap()->get_epoch(), info); // send new stuff to append to replicas log - m->log.copy_after(pg_log.get_log(), pi.last_update); + m->log.copy_after(cct, pg_log.get_log(), pi.last_update); } // share past_intervals if we are creating the pg on the replica @@ -5666,7 +5666,7 @@ void PG::fulfill_log( << ", sending full log instead"; mlog->log = pg_log.get_log(); // primary should not have requested this!! } else - mlog->log.copy_after(pg_log.get_log(), query.since); + mlog->log.copy_after(cct, pg_log.get_log(), query.since); } else if (query.type == pg_query_t::FULLLOG) { dout(10) << " sending info+missing+full log" << dendl; diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index b8acccc17df..99c66713177 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -4339,11 +4339,41 @@ void pg_log_t::generate_test_instances(list& o) o.back()->log.push_back(**p); } -void pg_log_t::copy_after(const pg_log_t &other, eversion_t v) +static void _handle_dups(CephContext* cct, pg_log_t &target, const pg_log_t &other, unsigned maxdups) +{ + auto earliest_dup_version = + target.head.version < maxdups ? 0u : target.head.version - maxdups + 1; + lgeneric_subdout(cct, osd, 20) << "copy_up_to/copy_after earliest_dup_version " << earliest_dup_version << dendl; + + for (auto d = other.dups.cbegin(); d != other.dups.cend(); ++d) { + if (d->version.version >= earliest_dup_version) { + lgeneric_subdout(cct, osd, 20) + << "copy_up_to/copy_after copy dup version " + << d->version << dendl; + target.dups.push_back(pg_log_dup_t(*d)); + } + } + + for (auto i = other.log.cbegin(); i != other.log.cend(); ++i) { + ceph_assert(i->version > other.tail); + if (i->version > target.tail) + break; + if (i->version.version >= earliest_dup_version) { + lgeneric_subdout(cct, osd, 20) + << "copy_up_to/copy_after copy dup from log version " + << i->version << dendl; + target.dups.push_back(pg_log_dup_t(*i)); + } + } +} + + +void pg_log_t::copy_after(CephContext* cct, const pg_log_t &other, eversion_t v) { can_rollback_to = other.can_rollback_to; head = other.head; tail = other.tail; + lgeneric_subdout(cct, osd, 20) << __func__ << " v " << v << dendl; for (list::const_reverse_iterator i = other.log.rbegin(); i != other.log.rend(); ++i) { @@ -4353,25 +4383,31 @@ void pg_log_t::copy_after(const pg_log_t &other, eversion_t v) tail = i->version; break; } + lgeneric_subdout(cct, osd, 20) << __func__ << " copy log version " << i->version << dendl; log.push_front(*i); } + _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked); } -void pg_log_t::copy_up_to(const pg_log_t &other, int max) +void pg_log_t::copy_up_to(CephContext* cct, const pg_log_t &other, int max) { can_rollback_to = other.can_rollback_to; int n = 0; head = other.head; tail = other.tail; + lgeneric_subdout(cct, osd, 20) << __func__ << " max " << max << dendl; for (list::const_reverse_iterator i = other.log.rbegin(); i != other.log.rend(); ++i) { + ceph_assert(i->version > other.tail); if (n++ >= max) { tail = i->version; break; } + lgeneric_subdout(cct, osd, 20) << __func__ << " copy log version " << i->version << dendl; log.push_front(*i); } + _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked); } ostream& pg_log_t::print(ostream& out) const diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index ad16d596691..6ac229ee431 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -3666,7 +3666,7 @@ public: * @param other pg_log_t to copy from * @param from copy entries after this version */ - void copy_after(const pg_log_t &other, eversion_t from); + void copy_after(CephContext* cct, const pg_log_t &other, eversion_t from); /** * copy up to N entries @@ -3674,7 +3674,7 @@ public: * @param other source log * @param max max number of entries to copy */ - void copy_up_to(const pg_log_t &other, int max); + void copy_up_to(CephContext* cct, const pg_log_t &other, int max); ostream& print(ostream& out) const; -- 2.47.3