From 5e0ec06376f832d32a6b1af390f925a59b03798f Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Tue, 15 Nov 2016 15:47:37 -0800 Subject: [PATCH] osd/: refactor PGLog a bit and add support for rolling back extents It was hard to reason about the validity of the IndexedLog internal pointers and iterators during updates, so this patch cleans that up a bunch. It also moves responsibility for doing rollbacks into PGBackend. Finally, it adds support for the new log entry format. Signed-off-by: Samuel Just --- src/osd/OSD.cc | 4 +- src/osd/PG.cc | 59 +++--- src/osd/PG.h | 78 +------- src/osd/PGBackend.cc | 174 +++++++++++++----- src/osd/PGBackend.h | 40 ++++- src/osd/PGLog.cc | 176 +++++------------- src/osd/PGLog.h | 335 ++++++++++++++++++----------------- src/osd/ReplicatedBackend.cc | 4 +- src/osd/ReplicatedPG.cc | 7 +- src/osd/ReplicatedPG.h | 12 ++ src/osd/osd_types.h | 98 +++++++++- src/test/osd/TestPGLog.cc | 33 ++-- 12 files changed, 556 insertions(+), 464 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 4d891a493d6..91766776eb1 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -8063,9 +8063,7 @@ void OSD::handle_pg_trim(OpRequestRef op) } else { // primary is instructing us to trim ObjectStore::Transaction t; - PG::PGLogEntryHandler handler; - pg->pg_log.trim(&handler, m->trim_to, pg->info); - handler.apply(pg, &t); + pg->pg_log.trim(m->trim_to, pg->info); pg->dirty_info = true; pg->write_if_dirty(t); int tr = store->queue_transaction(pg->osr.get(), std::move(t), NULL); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index fc466a40824..73c71bbdc9b 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -435,18 +435,16 @@ void PG::update_object_snap_mapping( void PG::merge_log( ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t &olog, pg_shard_t from) { - PGLogEntryHandler rollbacker; + PGLogEntryHandler rollbacker{this, &t}; pg_log.merge_log( t, oinfo, olog, from, info, &rollbacker, dirty_info, dirty_big_info); - rollbacker.apply(this, &t); } void PG::rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead) { - PGLogEntryHandler rollbacker; + PGLogEntryHandler rollbacker{this, &t}; pg_log.rewind_divergent_log( t, newhead, info, &rollbacker, dirty_info, dirty_big_info); - rollbacker.apply(this, &t); } /* @@ -1590,7 +1588,7 @@ void PG::activate(ObjectStore::Transaction& t, min_last_complete_ondisk = eversion_t(0,0); // we don't know (yet)! } last_update_applied = info.last_update; - last_rollback_info_trimmed_to_applied = pg_log.get_rollback_trimmed_to(); + last_rollback_info_trimmed_to_applied = pg_log.get_can_rollback_to(); need_up_thru = false; @@ -1848,10 +1846,12 @@ void PG::activate(ObjectStore::Transaction& t, state_set(PG_STATE_ACTIVATING); } + if (is_primary()) { + projected_last_update = info.last_update; + } if (acting.size() >= pool.info.min_size) { - PGLogEntryHandler handler; + PGLogEntryHandler handler{this, &t}; pg_log.roll_forward(&handler); - handler.apply(this, &t); } } @@ -3053,21 +3053,29 @@ void PG::append_log( } dout(10) << "append_log " << pg_log.get_log() << " " << logv << dendl; + PGLogEntryHandler handler{this, &t}; + if (!transaction_applied) { + /* We must be a backfill peer, so it's ok if we apply + * out-of-turn since we won't be considered when + * determining a min possible last_update. + */ + pg_log.roll_forward(&handler); + } + for (vector::const_iterator p = logv.begin(); p != logv.end(); ++p) { - add_log_entry(*p); - } + add_log_entry(*p, transaction_applied); - PGLogEntryHandler handler; - if (!transaction_applied) { - pg_log.roll_forward(&handler); - t.register_on_applied( - new C_UpdateLastRollbackInfoTrimmedToApplied( - this, - get_osdmap()->get_epoch(), - info.last_update)); - } else if (roll_forward_to > pg_log.get_rollback_trimmed_to()) { + /* We don't want to leave the rollforward artifacts around + * here past last_backfill. It's ok for the same reason as + * above */ + if (transaction_applied && + (cmp(p->soid, info.last_backfill, get_sort_bitwise()) > 0)) { + pg_log.roll_forward(&handler); + } + } + if (transaction_applied && roll_forward_to > pg_log.get_can_rollback_to()) { pg_log.roll_forward_to( roll_forward_to, &handler); @@ -3078,11 +3086,7 @@ void PG::append_log( roll_forward_to)); } - pg_log.trim(&handler, trim_to, info); - - dout(10) << __func__ << ": rolling forward to " << roll_forward_to - << " entries " << handler.to_trim << dendl; - handler.apply(this, &t); + pg_log.trim(trim_to, info); // update the local pg, pg log dirty_info = true; @@ -4653,13 +4657,12 @@ bool PG::append_log_entries_update_missing( assert(!entries.empty()); assert(entries.begin()->version > info.last_update); - PGLogEntryHandler rollbacker; + PGLogEntryHandler rollbacker{this, &t}; bool invalidate_stats = pg_log.append_new_log_entries(info.last_backfill, info.last_backfill_bitwise, entries, &rollbacker); - rollbacker.apply(this, &t); info.last_update = pg_log.get_head(); if (pg_log.get_missing().num_missing() == 0) { @@ -4695,6 +4698,7 @@ void PG::merge_new_log_entries( pinfo.last_backfill, info.last_backfill_bitwise, entries, + true, NULL, pmissing, NULL, @@ -5286,7 +5290,7 @@ ostream& operator<<(ostream& out, const PG& pg) if (!pg.backfill_targets.empty()) out << " bft=" << pg.backfill_targets; - out << " crt=" << pg.pg_log.get_log().can_rollback_to; + out << " crt=" << pg.pg_log.get_can_rollback_to(); if (pg.last_complete_ondisk != pg.info.last_complete) out << " lcod " << pg.last_complete_ondisk; @@ -7126,9 +7130,8 @@ boost::statechart::result PG::RecoveryState::Stray::react(const MLogRec& logevt) pg->dirty_info = true; pg->dirty_big_info = true; // maybe. - PGLogEntryHandler rollbacker; + PGLogEntryHandler rollbacker{pg, t}; pg->pg_log.reset_backfill_claim_log(msg->log, &rollbacker); - rollbacker.apply(pg, t); pg->pg_log.reset_backfill(); } else { diff --git a/src/osd/PG.h b/src/osd/PG.h index 891fe991c64..ce79c69660f 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -941,92 +941,30 @@ public: bool proc_replica_info( pg_shard_t from, const pg_info_t &info, epoch_t send_epoch); - - struct LogEntryTrimmer : public ObjectModDesc::Visitor { - const hobject_t &soid; - PG *pg; - ObjectStore::Transaction *t; - LogEntryTrimmer(const hobject_t &soid, PG *pg, ObjectStore::Transaction *t) - : soid(soid), pg(pg), t(t) {} - void rmobject(version_t old_version) { - pg->get_pgbackend()->trim_stashed_object( - soid, - old_version, - t); - } - }; - - struct SnapRollBacker : public ObjectModDesc::Visitor { - const hobject_t &soid; + struct PGLogEntryHandler : public PGLog::LogEntryHandler { PG *pg; ObjectStore::Transaction *t; - SnapRollBacker(const hobject_t &soid, PG *pg, ObjectStore::Transaction *t) - : soid(soid), pg(pg), t(t) {} - void update_snaps(set &snaps) { - pg->update_object_snap_mapping(t, soid, snaps); - } - void create() { - pg->clear_object_snap_mapping( - t, - soid); - } - }; + PGLogEntryHandler(PG *pg, ObjectStore::Transaction *t) : pg(pg), t(t) {} - struct PGLogEntryHandler : public PGLog::LogEntryHandler { - mempool::osd::list to_rollback; - set to_remove; - mempool::osd::list to_trim; - list > to_stash; - // LogEntryHandler void remove(const hobject_t &hoid) { - to_remove.insert(hoid); + pg->get_pgbackend()->remove(hoid, t); } void try_stash(const hobject_t &hoid, version_t v) { - to_stash.push_back(make_pair(hoid, v)); + pg->get_pgbackend()->try_stash(hoid, v, t); } void rollback(const pg_log_entry_t &entry) { - to_rollback.push_back(entry); + assert(entry.can_rollback()); + pg->get_pgbackend()->rollback(entry, t); } void rollforward(const pg_log_entry_t &entry) { - to_trim.push_back(entry); + pg->get_pgbackend()->rollforward(entry, t); } void trim(const pg_log_entry_t &entry) { - to_trim.push_back(entry); - } - - void apply(PG *pg, ObjectStore::Transaction *t) { - for (list::iterator j = to_rollback.begin(); - j != to_rollback.end(); - ++j) { - assert(j->mod_desc.can_rollback()); - pg->get_pgbackend()->rollback(j->soid, j->mod_desc, t); - SnapRollBacker rollbacker(j->soid, pg, t); - j->mod_desc.visit(&rollbacker); - } - for (list >::iterator i = to_stash.begin(); - i != to_stash.end(); - ++i) { - pg->get_pgbackend()->try_stash(i->first, i->second, t); - } - for (set::iterator i = to_remove.begin(); - i != to_remove.end(); - ++i) { - pg->get_pgbackend()->rollback_create(*i, t); - pg->remove_snap_mapped_object(*t, *i); - } - for (list::reverse_iterator i = to_trim.rbegin(); - i != to_trim.rend(); - ++i) { - LogEntryTrimmer trimmer(i->soid, pg, t); - i->mod_desc.visit(&trimmer); - } + pg->get_pgbackend()->trim(entry, t); } }; - friend struct SnapRollBacker; - friend struct PGLogEntryHandler; - friend struct LogEntryTrimmer; void update_object_snap_mapping( ObjectStore::Transaction *t, const hobject_t &soid, const set &snaps); diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc index c3153d01a1a..65b56c5a010 100644 --- a/src/osd/PGBackend.cc +++ b/src/osd/PGBackend.cc @@ -36,60 +36,117 @@ static ostream& _prefix(std::ostream *_dout, PGBackend *pgb) { return *_dout << pgb->get_parent()->gen_dbg_prefix(); } -// -- ObjectModDesc -- -struct RollbackVisitor : public ObjectModDesc::Visitor { - const hobject_t &hoid; +void PGBackend::rollback( + const pg_log_entry_t &entry, + ObjectStore::Transaction *t) +{ + + struct RollbackVisitor : public ObjectModDesc::Visitor { + const hobject_t &hoid; + PGBackend *pg; + ObjectStore::Transaction t; + RollbackVisitor( + const hobject_t &hoid, + PGBackend *pg) : hoid(hoid), pg(pg) {} + void append(uint64_t old_size) override { + ObjectStore::Transaction temp; + pg->rollback_append(hoid, old_size, &temp); + temp.append(t); + temp.swap(t); + } + void setattrs(map > &attrs) override { + ObjectStore::Transaction temp; + pg->rollback_setattrs(hoid, attrs, &temp); + temp.append(t); + temp.swap(t); + } + void rmobject(version_t old_version) override { + ObjectStore::Transaction temp; + pg->rollback_stash(hoid, old_version, &temp); + temp.append(t); + temp.swap(t); + } + void try_rmobject(version_t old_version) override { + ObjectStore::Transaction temp; + pg->rollback_try_stash(hoid, old_version, &temp); + temp.append(t); + temp.swap(t); + } + void create() override { + ObjectStore::Transaction temp; + pg->rollback_create(hoid, &temp); + temp.append(t); + temp.swap(t); + } + void update_snaps(const set &snaps) override { + ObjectStore::Transaction temp; + pg->get_parent()->pgb_set_object_snap_mapping(hoid, snaps, &temp); + temp.append(t); + temp.swap(t); + } + void rollback_extents( + version_t gen, + const vector > &extents) override { + ObjectStore::Transaction temp; + pg->rollback_extents(gen, extents, hoid, &temp); + temp.append(t); + temp.swap(t); + } + }; + + assert(entry.mod_desc.can_rollback()); + RollbackVisitor vis(entry.soid, this); + entry.mod_desc.visit(&vis); + t->append(vis.t); +} + +struct Trimmer : public ObjectModDesc::Visitor { + const hobject_t &soid; PGBackend *pg; - ObjectStore::Transaction t; - RollbackVisitor( - const hobject_t &hoid, - PGBackend *pg) : hoid(hoid), pg(pg) {} - void append(uint64_t old_size) { - ObjectStore::Transaction temp; - pg->rollback_append(hoid, old_size, &temp); - temp.append(t); - temp.swap(t); - } - void setattrs(map > &attrs) { - ObjectStore::Transaction temp; - pg->rollback_setattrs(hoid, attrs, &temp); - temp.append(t); - temp.swap(t); - } + ObjectStore::Transaction *t; + Trimmer( + const hobject_t &soid, + PGBackend *pg, + ObjectStore::Transaction *t) + : soid(soid), pg(pg), t(t) {} void rmobject(version_t old_version) { - ObjectStore::Transaction temp; - pg->rollback_stash(hoid, old_version, &temp); - temp.append(t); - temp.swap(t); - } - void try_rmobject(version_t old_version) { - ObjectStore::Transaction temp; - pg->rollback_try_stash(hoid, old_version, &temp); - temp.append(t); - temp.swap(t); - } - void create() { - ObjectStore::Transaction temp; - pg->rollback_create(hoid, &temp); - temp.append(t); - temp.swap(t); + pg->trim_rollback_object( + soid, + old_version, + t); } - void update_snaps(set &snaps) { - // pass + // try_rmobject defaults to rmobject + void rollback_extents( + version_t gen, + const vector > &extents) override { + pg->trim_rollback_object( + soid, + gen, + t); } }; -void PGBackend::rollback( - const hobject_t &hoid, - const ObjectModDesc &desc, +void PGBackend::rollforward( + const pg_log_entry_t &entry, ObjectStore::Transaction *t) { - assert(desc.can_rollback()); - RollbackVisitor vis(hoid, this); - desc.visit(&vis); - t->append(vis.t); + auto dpp = get_parent()->get_dpp(); + ldpp_dout(dpp, 20) << __func__ << ": entry=" << entry << dendl; + if (!entry.can_rollback()) + return; + Trimmer trimmer(entry.soid, this, t); + entry.mod_desc.visit(&trimmer); } +void PGBackend::trim( + const pg_log_entry_t &entry, + ObjectStore::Transaction *t) +{ + if (!entry.can_rollback()) + return; + Trimmer trimmer(entry.soid, this, t); + entry.mod_desc.visit(&trimmer); +} void PGBackend::try_stash( const hobject_t &hoid, @@ -102,6 +159,16 @@ void PGBackend::try_stash( ghobject_t(hoid, v, get_parent()->whoami_shard().shard)); } +void PGBackend::remove( + const hobject_t &hoid, + ObjectStore::Transaction *t) { + assert(!hoid.is_temp()); + t->remove( + coll, + ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard)); + get_parent()->pgb_clear_object_snap_mapping(hoid, t); +} + void PGBackend::on_change_cleanup(ObjectStore::Transaction *t) { dout(10) << __func__ << dendl; @@ -293,16 +360,27 @@ void PGBackend::rollback_try_stash( ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard)); } -void PGBackend::rollback_create( +void PGBackend::rollback_extents( + version_t gen, + const vector > &extents, const hobject_t &hoid, ObjectStore::Transaction *t) { - assert(!hoid.is_temp()); + auto shard = get_parent()->whoami_shard().shard; + for (auto &&extent: extents) { + t->clone_range( + coll, + ghobject_t(hoid, gen, shard), + ghobject_t(hoid, ghobject_t::NO_GEN, shard), + extent.first, + extent.second, + extent.first); + } t->remove( coll, - ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard)); + ghobject_t(hoid, gen, shard)); } -void PGBackend::trim_stashed_object( +void PGBackend::trim_rollback_object( const hobject_t &hoid, version_t old_version, ObjectStore::Transaction *t) { diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index 7f7a4481a2c..c92d44fb855 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -196,6 +196,15 @@ typedef ceph::shared_ptr OSDMapRef; bool transaction_applied, ObjectStore::Transaction &t) = 0; + virtual void pgb_set_object_snap_mapping( + const hobject_t &soid, + const set &snaps, + ObjectStore::Transaction *t) = 0; + + virtual void pgb_clear_object_snap_mapping( + const hobject_t &soid, + ObjectStore::Transaction *t) = 0; + virtual void update_peer_last_complete_ondisk( pg_shard_t fromosd, eversion_t lcod) = 0; @@ -393,10 +402,23 @@ typedef ceph::shared_ptr OSDMapRef; ObjectStore::Transaction *t); void rollback( + const pg_log_entry_t &entry, + ObjectStore::Transaction *t); + + friend class LRBTrimmer; + void rollforward( + const pg_log_entry_t &entry, + ObjectStore::Transaction *t); + + void trim( + const pg_log_entry_t &entry, + ObjectStore::Transaction *t); + + void remove( const hobject_t &hoid, - const ObjectModDesc &desc, ObjectStore::Transaction *t); + protected: /// Reapply old attributes void rollback_setattrs( const hobject_t &hoid, @@ -423,13 +445,23 @@ typedef ceph::shared_ptr OSDMapRef; /// Delete object to rollback create void rollback_create( + const hobject_t &hoid, + ObjectStore::Transaction *t) { + remove(hoid, t); + } + + /// Clone the extents back into place + void rollback_extents( + version_t gen, + const vector > &extents, const hobject_t &hoid, ObjectStore::Transaction *t); + public: - /// Trim object stashed at stashed_version - void trim_stashed_object( + /// Trim object stashed at version + void trim_rollback_object( const hobject_t &hoid, - version_t stashed_version, + version_t gen, ObjectStore::Transaction *t); /// List objects in collection diff --git a/src/osd/PGLog.cc b/src/osd/PGLog.cc index b0fc2e25e2b..ea2a85000d5 100644 --- a/src/osd/PGLog.cc +++ b/src/osd/PGLog.cc @@ -30,49 +30,17 @@ static ostream& _prefix(std::ostream *_dout, const PGLog *pglog) //////////////////// PGLog::IndexedLog //////////////////// -void PGLog::IndexedLog::filter_log(spg_t pgid, const OSDMap &map, const string &hit_set_namespace) -{ - IndexedLog out; - pg_log_t reject; - - pg_log_t::filter_log(pgid, map, hit_set_namespace, *this, out, reject); - - *this = out; - index(); -} - -void PGLog::IndexedLog::split_into( +PGLog::IndexedLog PGLog::IndexedLog::split_out_child( pg_t child_pgid, - unsigned split_bits, - PGLog::IndexedLog *olog) + unsigned split_bits) { - mempool::osd::list oldlog; - oldlog.swap(log); - - eversion_t old_tail; - olog->head = head; - olog->tail = tail; - unsigned mask = ~((~0)<::iterator i = oldlog.begin(); - i != oldlog.end(); - ) { - if ((i->soid.get_hash() & mask) == child_pgid.m_seed) { - olog->log.push_back(*i); - } else { - log.push_back(*i); - } - oldlog.erase(i++); - } - - - olog->can_rollback_to = can_rollback_to; - - olog->index(); + IndexedLog ret(pg_log_t::split_out_child(child_pgid, split_bits)); index(); + reset_rollback_info_trimmed_to_riter(); + return ret; } void PGLog::IndexedLog::trim( - LogEntryHandler *handler, eversion_t s, set *trimmed) { @@ -83,9 +51,7 @@ void PGLog::IndexedLog::trim( << " on " << *this << dendl; } - if (s > can_rollback_to) - can_rollback_to = s; - trim_rollback_info_to(s, handler); + assert(s <= can_rollback_to); while (!log.empty()) { pg_log_entry_t &e = *log.begin(); @@ -145,7 +111,6 @@ void PGLog::clear_info_log( } void PGLog::trim( - LogEntryHandler *handler, eversion_t trim_to, pg_info_t &info) { @@ -155,7 +120,7 @@ void PGLog::trim( assert(trim_to <= info.last_complete); dout(10) << "trim " << log << " to " << trim_to << dendl; - log.trim(handler, trim_to, &trimmed); + log.trim(trim_to, &trimmed); info.log_tail = log.tail; } } @@ -223,38 +188,13 @@ void PGLog::proc_replica_log( log.tail : first_non_divergent->version; - mempool::osd::list divergent; - list::const_iterator pp = olog.log.end(); - while (true) { - if (pp == olog.log.begin()) - break; - - --pp; - const pg_log_entry_t& oe = *pp; - - // don't continue past the tail of our log. - if (oe.version <= log.tail) { - ++pp; - break; - } - - if (oe.version <= lu) { - ++pp; - break; - } - - divergent.push_front(oe); - } - - - IndexedLog folog; - folog.log.insert(folog.log.begin(), olog.log.begin(), pp); - folog.index(); + IndexedLog folog(olog); + auto divergent = folog.rewind_from_head(lu); _merge_divergent_entries( folog, divergent, oinfo, - olog.can_rollback_to, + olog.get_can_rollback_to(), omissing, 0, this); @@ -296,49 +236,29 @@ void PGLog::rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead bool &dirty_info, bool &dirty_big_info) { dout(10) << "rewind_divergent_log truncate divergent future " << newhead << dendl; - assert(newhead >= log.tail); - - list::iterator p = log.log.end(); - mempool::osd::list divergent; - while (true) { - if (p == log.log.begin()) { - // yikes, the whole thing is divergent! - divergent.swap(log.log); - break; - } - --p; - mark_dirty_from(p->version); - if (p->version <= newhead) { - ++p; - divergent.splice(divergent.begin(), log.log, p, log.log.end()); - break; - } - assert(p->version > newhead); - dout(10) << "rewind_divergent_log future divergent " << *p << dendl; - } - log.head = newhead; - info.last_update = newhead; + if (info.last_complete > newhead) info.last_complete = newhead; - if (log.rollback_info_trimmed_to > newhead) - log.rollback_info_trimmed_to = newhead; - - log.index(); + auto divergent = log.rewind_from_head(newhead); + if (!divergent.empty()) { + mark_dirty_from(divergent.front().version); + } + for (auto &&entry: divergent) { + dout(10) << "rewind_divergent_log future divergent " << entry << dendl; + } + info.last_update = newhead; _merge_divergent_entries( log, divergent, info, - log.can_rollback_to, + log.get_can_rollback_to(), missing, rollbacker, this); - if (info.last_update < log.can_rollback_to) - log.can_rollback_to = info.last_update; - dirty_info = true; dirty_big_info = true; } @@ -431,53 +351,41 @@ void PGLog::merge_log(ObjectStore::Transaction& t, } mark_dirty_from(lower_bound); + auto divergent = log.rewind_from_head(lower_bound); // move aside divergent items - mempool::osd::list divergent; - while (!log.empty()) { - pg_log_entry_t &oe = *log.log.rbegin(); - /* - * look at eversion.version here. we want to avoid a situation like: - * our log: 100'10 (0'0) m 10000004d3a.00000000/head by client4225.1:18529 - * new log: 122'10 (0'0) m 10000004d3a.00000000/head by client4225.1:18529 - * lower_bound = 100'9 - * i.e, same request, different version. If the eversion.version is > the - * lower_bound, we it is divergent. - */ - if (oe.version.version <= lower_bound.version) - break; + for (auto &&oe: divergent) { dout(10) << "merge_log divergent " << oe << dendl; - divergent.push_front(oe); - log.log.pop_back(); } + log.roll_forward_to(log.head, rollbacker); - mempool::osd::list entries; - entries.splice(entries.end(), olog.log, from, to); + mempool::osd::list new_entries; + new_entries.splice(new_entries.end(), olog.log, from, to); append_log_entries_update_missing( info.last_backfill, info.last_backfill_bitwise, - entries, + new_entries, + false, &log, missing, rollbacker, this); - log.index(); - - info.last_update = log.head = olog.head; - - info.last_user_version = oinfo.last_user_version; - info.purged_snaps = oinfo.purged_snaps; _merge_divergent_entries( log, divergent, info, - log.can_rollback_to, + log.get_can_rollback_to(), missing, rollbacker, this); + info.last_update = log.head = olog.head; + // We cannot rollback into the new log entries - log.can_rollback_to = log.head; + log.skip_can_rollback_to_to_head(); + + info.last_user_version = oinfo.last_user_version; + info.purged_snaps = oinfo.purged_snaps; changed = true; } @@ -659,8 +567,12 @@ void PGLog::_write_log_and_missing_wo_missing( ::encode(divergent_priors, (*km)["divergent_priors"]); } if (require_rollback) { - ::encode(log.can_rollback_to, (*km)["can_rollback_to"]); - ::encode(log.rollback_info_trimmed_to, (*km)["rollback_info_trimmed_to"]); + ::encode( + log.get_can_rollback_to(), + (*km)["can_rollback_to"]); + ::encode( + log.get_rollback_info_trimmed_to(), + (*km)["rollback_info_trimmed_to"]); } if (!to_remove.empty()) @@ -753,8 +665,12 @@ void PGLog::_write_log_and_missing( } }); if (require_rollback) { - ::encode(log.can_rollback_to, (*km)["can_rollback_to"]); - ::encode(log.rollback_info_trimmed_to, (*km)["rollback_info_trimmed_to"]); + ::encode( + log.get_can_rollback_to(), + (*km)["can_rollback_to"]); + ::encode( + log.get_rollback_info_trimmed_to(), + (*km)["rollback_info_trimmed_to"]); } if (!to_remove.empty()) diff --git a/src/osd/PGLog.h b/src/osd/PGLog.h index 864866f98f0..786dda6a29b 100644 --- a/src/osd/PGLog.h +++ b/src/osd/PGLog.h @@ -50,13 +50,13 @@ struct PGLog : DoutPrefixProvider { const pg_log_entry_t &entry) = 0; virtual void rollforward( const pg_log_entry_t &entry) = 0; + virtual void trim( + const pg_log_entry_t &entry) = 0; virtual void remove( const hobject_t &hoid) = 0; virtual void try_stash( const hobject_t &hoid, version_t v) = 0; - virtual void trim( - const pg_log_entry_t &entry) = 0; virtual ~LogEntryHandler() {} }; @@ -73,6 +73,7 @@ struct PGLog : DoutPrefixProvider { char buf[512]; }; +public: /** * IndexLog - adds in-memory index of the log, by oid. * plus some methods to manipulate it all. @@ -83,12 +84,12 @@ struct PGLog : DoutPrefixProvider { mutable ceph::unordered_multimap extra_caller_ops; // recovery pointers - list::iterator complete_to; // not inclusive of referenced item - version_t last_requested; // last object requested by primary + list::iterator complete_to; // not inclusive of referenced item + version_t last_requested = 0; // last object requested by primary // private: - mutable __u16 indexed_data; + mutable __u16 indexed_data = 0; /** * rollback_info_trimmed_to_riter points to the first log entry <= * rollback_info_trimmed_to @@ -96,11 +97,13 @@ struct PGLog : DoutPrefixProvider { * It's a reverse_iterator because rend() is a natural representation for * tail, and rbegin() works nicely for head. */ - list::reverse_iterator rollback_info_trimmed_to_riter; - public: + mempool::osd::list::reverse_iterator + rollback_info_trimmed_to_riter; + template void advance_can_rollback_to(eversion_t to, F &&f) { - assert(to <= can_rollback_to); + if (to > can_rollback_to) + can_rollback_to = to; if (to > rollback_info_trimmed_to) rollback_info_trimmed_to = to; @@ -114,6 +117,49 @@ struct PGLog : DoutPrefixProvider { f(*rollback_info_trimmed_to_riter); } } + + void reset_rollback_info_trimmed_to_riter() { + rollback_info_trimmed_to_riter = log.rbegin(); + while (rollback_info_trimmed_to_riter != log.rend() && + rollback_info_trimmed_to_riter->version > rollback_info_trimmed_to) + ++rollback_info_trimmed_to_riter; + } + + // indexes objects, caller ops and extra caller ops + public: + IndexedLog() : + complete_to(log.end()), + last_requested(0), + indexed_data(0), + rollback_info_trimmed_to_riter(log.rbegin()) + {} + + template + IndexedLog(Args&&... args) : + pg_log_t(std::forward(args)...), + complete_to(log.end()), + last_requested(0), + indexed_data(0), + rollback_info_trimmed_to_riter(log.rbegin()) { + reset_rollback_info_trimmed_to_riter(); + index(); + } + + IndexedLog(const IndexedLog &rhs) : + pg_log_t(rhs), + complete_to(log.end()), + last_requested(rhs.last_requested), + indexed_data(0), + rollback_info_trimmed_to_riter(log.rbegin()) { + reset_rollback_info_trimmed_to_riter(); + index(rhs.indexed_data); + } + IndexedLog &operator=(const IndexedLog &rhs) { + this->~IndexedLog(); + new (this) IndexedLog(rhs); + return *this; + } + void trim_rollback_info_to(eversion_t to, LogEntryHandler *h) { advance_can_rollback_to( to, @@ -129,30 +175,32 @@ struct PGLog : DoutPrefixProvider { }); } - /****/ - IndexedLog() : - complete_to(log.end()), - last_requested(0), - indexed_data(0), - rollback_info_trimmed_to_riter(log.rbegin()) - {} + void skip_can_rollback_to_to_head() { + advance_can_rollback_to(head, [&](const pg_log_entry_t &entry) {}); + } + mempool::osd::list rewind_from_head(eversion_t newhead) { + auto divergent = pg_log_t::rewind_from_head(newhead); + index(); + reset_rollback_info_trimmed_to_riter(); + return divergent; + } + + /****/ void claim_log_and_clear_rollback_info(const pg_log_t& o) { // we must have already trimmed the old entries assert(rollback_info_trimmed_to == head); assert(rollback_info_trimmed_to_riter == log.rbegin()); - log = o.log; - head = o.head; - rollback_info_trimmed_to = head; - tail = o.tail; + *this = IndexedLog(o); + + skip_can_rollback_to_to_head(); index(); } - void split_into( + IndexedLog split_out_child( pg_t child_pgid, - unsigned split_bits, - IndexedLog *olog); + unsigned split_bits); void zero() { // we must have already trimmed the old entries @@ -165,8 +213,7 @@ struct PGLog : DoutPrefixProvider { reset_recovery_pointers(); } void clear() { - rollback_info_trimmed_to = head; - rollback_info_trimmed_to_riter = log.rbegin(); + skip_can_rollback_to_to_head(); zero(); } void reset_recovery_pointers() { @@ -264,85 +311,53 @@ struct PGLog : DoutPrefixProvider { } } - void reset_rollback_info_trimmed_to_riter() { - rollback_info_trimmed_to_riter = log.rbegin(); - while (rollback_info_trimmed_to_riter != log.rend() && - rollback_info_trimmed_to_riter->version > rollback_info_trimmed_to) - ++rollback_info_trimmed_to_riter; - } + void index(__u16 to_index = PGLOG_INDEXED_ALL) const { + if (to_index & PGLOG_INDEXED_OBJECTS) + objects.clear(); + if (to_index & PGLOG_INDEXED_CALLER_OPS) + caller_ops.clear(); + if (to_index & PGLOG_INDEXED_EXTRA_CALLER_OPS) + extra_caller_ops.clear(); - // indexes objects, caller ops and extra caller ops - void index() { - objects.clear(); - caller_ops.clear(); - extra_caller_ops.clear(); - for (list::iterator i = log.begin(); - i != log.end(); - ++i) { - if (i->object_is_indexed()) { - objects[i->soid] = &(*i); + for (list::const_iterator i = log.begin(); + i != log.end(); + ++i) { + if (to_index & PGLOG_INDEXED_OBJECTS) { + if (i->object_is_indexed()) { + objects[i->soid] = const_cast(&(*i)); + } } - if (i->reqid_is_indexed()) { - //assert(caller_ops.count(i->reqid) == 0); // divergent merge_log indexes new before unindexing old - caller_ops[i->reqid] = &(*i); - } + if (to_index & PGLOG_INDEXED_CALLER_OPS) { + if (i->reqid_is_indexed()) { + caller_ops[i->reqid] = const_cast(&(*i)); + } + } - for (vector >::const_iterator j = - i->extra_reqids.begin(); - j != i->extra_reqids.end(); - ++j) { - extra_caller_ops.insert(make_pair(j->first, &(*i))); - } + if (to_index & PGLOG_INDEXED_EXTRA_CALLER_OPS) { + for (vector >::const_iterator j = + i->extra_reqids.begin(); + j != i->extra_reqids.end(); + ++j) { + extra_caller_ops.insert( + make_pair(j->first, const_cast(&(*i)))); + } + } } - indexed_data = PGLOG_INDEXED_ALL; - reset_rollback_info_trimmed_to_riter(); + indexed_data |= to_index; } void index_objects() const { - objects.clear(); - for (list::const_iterator i = log.begin(); - i != log.end(); - ++i) { - if (i->object_is_indexed()) { - objects[i->soid] = const_cast(&(*i)); - } - } - - indexed_data |= PGLOG_INDEXED_OBJECTS; + index(PGLOG_INDEXED_OBJECTS); } void index_caller_ops() const { - caller_ops.clear(); - for (list::const_iterator i = log.begin(); - i != log.end(); - ++i) { - - if (i->reqid_is_indexed()) { - //assert(caller_ops.count(i->reqid) == 0); // divergent merge_log indexes new before unindexing old - caller_ops[i->reqid] = const_cast(&(*i)); - } - } - - indexed_data |= PGLOG_INDEXED_CALLER_OPS; + index(PGLOG_INDEXED_CALLER_OPS); } void index_extra_caller_ops() const { - extra_caller_ops.clear(); - for (list::const_iterator i = log.begin(); - i != log.end(); - ++i) { - - for (vector >::const_iterator j = - i->extra_reqids.begin(); - j != i->extra_reqids.end(); - ++j) { - extra_caller_ops.insert(make_pair(j->first, const_cast(&(*i)))); - } - } - - indexed_data |= PGLOG_INDEXED_EXTRA_CALLER_OPS; + index(PGLOG_INDEXED_EXTRA_CALLER_OPS); } void index(pg_log_entry_t& e) { @@ -352,17 +367,17 @@ struct PGLog : DoutPrefixProvider { objects[e.soid] = &e; } if (indexed_data & PGLOG_INDEXED_CALLER_OPS) { + // divergent merge_log indexes new before unindexing old if (e.reqid_is_indexed()) { - //assert(caller_ops.count(i->reqid) == 0); // divergent merge_log indexes new before unindexing old - caller_ops[e.reqid] = &e; + caller_ops[e.reqid] = &e; } } if (indexed_data & PGLOG_INDEXED_EXTRA_CALLER_OPS) { for (vector >::const_iterator j = - e.extra_reqids.begin(); - j != e.extra_reqids.end(); - ++j) { - extra_caller_ops.insert(make_pair(j->first, &e)); + e.extra_reqids.begin(); + j != e.extra_reqids.end(); + ++j) { + extra_caller_ops.insert(make_pair(j->first, &e)); } } } @@ -380,18 +395,18 @@ struct PGLog : DoutPrefixProvider { } if (e.reqid_is_indexed()) { if (indexed_data & PGLOG_INDEXED_CALLER_OPS) { - if (caller_ops.count(e.reqid) && // divergent merge_log indexes new before unindexing old - caller_ops[e.reqid] == &e) + // divergent merge_log indexes new before unindexing old + if (caller_ops.count(e.reqid) && caller_ops[e.reqid] == &e) caller_ops.erase(e.reqid); } } if (indexed_data & PGLOG_INDEXED_EXTRA_CALLER_OPS) { for (vector >::const_iterator j = - e.extra_reqids.begin(); + e.extra_reqids.begin(); j != e.extra_reqids.end(); ++j) { for (ceph::unordered_multimap::iterator k = - extra_caller_ops.find(j->first); + extra_caller_ops.find(j->first); k != extra_caller_ops.end() && k->first == j->first; ++k) { if (k->second == &e) { @@ -408,12 +423,6 @@ struct PGLog : DoutPrefixProvider { // add to log log.push_back(e); - /** - * Make sure we don't keep around more than we need to in the - * in-memory log - */ - log.back().mod_desc.trim_bl(); - // riter previously pointed to the previous entry if (rollback_info_trimmed_to_riter == log.rbegin()) ++rollback_info_trimmed_to_riter; @@ -428,28 +437,25 @@ struct PGLog : DoutPrefixProvider { } if (indexed_data & PGLOG_INDEXED_CALLER_OPS) { if (e.reqid_is_indexed()) { - caller_ops[e.reqid] = &(log.back()); + caller_ops[e.reqid] = &(log.back()); } } if (indexed_data & PGLOG_INDEXED_EXTRA_CALLER_OPS) { for (vector >::const_iterator j = - e.extra_reqids.begin(); - j != e.extra_reqids.end(); - ++j) { - extra_caller_ops.insert(make_pair(j->first, &(log.back()))); + e.extra_reqids.begin(); + j != e.extra_reqids.end(); + ++j) { + extra_caller_ops.insert(make_pair(j->first, &(log.back()))); } } } void trim( - LogEntryHandler *handler, eversion_t s, set *trimmed); ostream& print(ostream& out) const; - - void filter_log(spg_t pgid, const OSDMap &map, const string &hit_set_namespace); }; @@ -583,9 +589,9 @@ public: void unindex() { log.unindex(); } - void add(const pg_log_entry_t& e) { + void add(const pg_log_entry_t& e, bool applied = true) { mark_writeout_from(e.version); - log.add(e); + log.add(e, applied); } void reset_recovery_pointers() { log.reset_recovery_pointers(); } @@ -595,22 +601,19 @@ public: ObjectStore::Transaction *t); void trim( - LogEntryHandler *handler, eversion_t trim_to, pg_info_t &info); void roll_forward_to( eversion_t roll_forward_to, LogEntryHandler *h) { - if (roll_forward_to > log.can_rollback_to) - log.can_rollback_to = roll_forward_to; log.roll_forward_to( roll_forward_to, h); } - eversion_t get_rollback_trimmed_to() const { - return log.rollback_info_trimmed_to; + eversion_t get_can_rollback_to() const { + return log.get_can_rollback_to(); } void roll_forward(LogEntryHandler *h) { @@ -622,7 +625,6 @@ public: //////////////////// get or set log & missing //////////////////// void reset_backfill_claim_log(const pg_log_t &o, LogEntryHandler *h) { - log.can_rollback_to = log.head; log.trim_rollback_info_to(log.head, h); log.claim_log_and_clear_rollback_info(o); missing.clear(); @@ -633,7 +635,7 @@ public: pg_t child_pgid, unsigned split_bits, PGLog *opg_log) { - log.split_into(child_pgid, split_bits, &(opg_log->log)); + opg_log->log = log.split_out_child(child_pgid, split_bits); missing.split_into(child_pgid, split_bits, &(opg_log->missing)); opg_log->mark_dirty_to(eversion_t::max()); mark_dirty_to(eversion_t::max()); @@ -659,8 +661,7 @@ public: } } - if (log.can_rollback_to < v) - log.can_rollback_to = v; + assert(log.get_can_rollback_to() >= v); } void activate_not_complete(pg_info_t &info) { @@ -749,9 +750,6 @@ protected: assert(i->prior_version == last); } last = i->version; - - if (rollbacker) - rollbacker->trim(*i); } const eversion_t prior_version = entries.begin()->prior_version; @@ -771,11 +769,11 @@ protected: if (objiter != log.objects.end() && objiter->second->version >= first_divergent_update) { /// Case 1) - assert(objiter->second->version > last_divergent_update); - ldpp_dout(dpp, 10) << __func__ << ": more recent entry found: " << *objiter->second << ", already merged" << dendl; + assert(objiter->second->version > last_divergent_update); + // ensure missing has been updated appropriately if (objiter->second->is_update()) { assert(missing.is_missing(hoid) && @@ -784,8 +782,14 @@ protected: assert(!missing.is_missing(hoid)); } missing.revise_have(hoid, eversion_t()); - if (rollbacker && !object_not_in_store) - rollbacker->remove(hoid); + if (rollbacker) { + if (!object_not_in_store) { + rollbacker->remove(hoid); + } + for (auto &&i: entries) { + rollbacker->trim(i); + } + } return; } @@ -799,8 +803,14 @@ protected: << dendl; if (missing.is_missing(hoid)) missing.rm(missing.get_items().find(hoid)); - if (rollbacker && !object_not_in_store) - rollbacker->remove(hoid); + if (rollbacker) { + if (!object_not_in_store) { + rollbacker->remove(hoid); + } + for (auto &&i: entries) { + rollbacker->trim(i); + } + } return; } @@ -827,6 +837,11 @@ protected: << info.log_tail << dendl; } } + if (rollbacker) { + for (auto &&i: entries) { + rollbacker->trim(i); + } + } return; } @@ -839,7 +854,7 @@ protected: for (list::const_reverse_iterator i = entries.rbegin(); i != entries.rend(); ++i) { - if (!i->mod_desc.can_rollback() || i->version <= olog_can_rollback_to) { + if (!i->can_rollback() || i->version <= olog_can_rollback_to) { ldpp_dout(dpp, 10) << __func__ << ": hoid " << hoid << " cannot rollback " << *i << dendl; can_rollback = false; @@ -852,7 +867,7 @@ protected: for (list::const_reverse_iterator i = entries.rbegin(); i != entries.rend(); ++i) { - assert(i->mod_desc.can_rollback() && i->version > olog_can_rollback_to); + assert(i->can_rollback() && i->version > olog_can_rollback_to); ldpp_dout(dpp, 10) << __func__ << ": hoid " << hoid << " rolling back " << *i << dendl; if (rollbacker) @@ -865,8 +880,13 @@ protected: /// Case 5) ldpp_dout(dpp, 10) << __func__ << ": hoid " << hoid << " cannot roll back, " << "removing and adding to missing" << dendl; - if (rollbacker && !object_not_in_store) - rollbacker->remove(hoid); + if (rollbacker) { + if (!object_not_in_store) + rollbacker->remove(hoid); + for (auto &&i: entries) { + rollbacker->trim(i); + } + } missing.add(hoid, prior_version, eversion_t()); if (prior_version <= info.log_tail) { ldpp_dout(dpp, 10) << __func__ << ": hoid " << hoid @@ -921,7 +941,7 @@ protected: oe.soid, entries, info, - log.can_rollback_to, + log.get_can_rollback_to(), missing, rollbacker, this); @@ -941,6 +961,7 @@ public: const hobject_t &last_backfill, bool last_backfill_bitwise, const mempool::osd::list &entries, + bool maintain_rollback, IndexedLog *log, missing_type &missing, LogEntryHandler *rollbacker, @@ -948,24 +969,21 @@ public: bool invalidate_stats = false; if (log && !entries.empty()) { assert(log->head < entries.begin()->version); - log->head = entries.rbegin()->version; } for (list::const_iterator p = entries.begin(); p != entries.end(); ++p) { invalidate_stats = invalidate_stats || !p->is_error(); if (log) { - log->log.push_back(*p); - pg_log_entry_t &ne = log->log.back(); - ldpp_dout(dpp, 20) << "update missing, append " << ne << dendl; - log->index(ne); + ldpp_dout(dpp, 20) << "update missing, append " << *p << dendl; + log->add(*p); } if (cmp(p->soid, last_backfill, last_backfill_bitwise) <= 0 && !p->is_error()) { missing.add_next_event(*p); if (rollbacker) { // hack to match PG::mark_all_unfound_lost - if (p->is_lost_delete() && p->mod_desc.can_rollback()) { + if (maintain_rollback && p->is_lost_delete() && p->can_rollback()) { rollbacker->try_stash(p->soid, p->version.version); } else if (p->is_delete()) { rollbacker->remove(p->soid); @@ -973,8 +991,6 @@ public: } } } - if (log) - log->reset_rollback_info_trimmed_to_riter(); return invalidate_stats; } bool append_new_log_entries( @@ -986,6 +1002,7 @@ public: last_backfill, last_backfill_bitwise, entries, + true, &log, missing, rollbacker, @@ -1087,13 +1104,13 @@ public: assert(r == 0); assert(st.st_size == 0); - log.tail = info.log_tail; // will get overridden below if it had been recorded - log.can_rollback_to = info.last_update; - log.rollback_info_trimmed_to = eversion_t(); + eversion_t on_disk_can_rollback_to = info.last_update; + eversion_t on_disk_rollback_info_trimmed_to = eversion_t(); ObjectMap::ObjectMapIterator p = store->get_omap_iterator(log_coll, log_oid); map divergent_priors; bool has_divergent_priors = false; + list entries; if (p) { for (p->seek_to_first(); p->valid() ; p->next(false)) { // non-log pgmeta_oid keys are prefixed with _; skip those @@ -1108,9 +1125,9 @@ public: has_divergent_priors = true; debug_verify_stored_missing = false; } else if (p->key() == "can_rollback_to") { - ::decode(log.can_rollback_to, bp); + ::decode(on_disk_can_rollback_to, bp); } else if (p->key() == "rollback_info_trimmed_to") { - ::decode(log.rollback_info_trimmed_to, bp); + ::decode(on_disk_rollback_info_trimmed_to, bp); } else if (p->key().substr(0, 7) == string("missing")) { pair p; ::decode(p, bp); @@ -1119,20 +1136,23 @@ public: pg_log_entry_t e; e.decode_with_checksum(bp); ldpp_dout(dpp, 20) << "read_log_and_missing " << e << dendl; - if (!log.log.empty()) { - pg_log_entry_t last_e(log.log.back()); + if (!entries.empty()) { + pg_log_entry_t last_e(entries.back()); assert(last_e.version.version < e.version.version); assert(last_e.version.epoch <= e.version.epoch); } - log.log.push_back(e); - log.head = e.version; + entries.push_back(e); if (log_keys_debug) log_keys_debug->insert(e.get_key_name()); } } } - log.head = info.last_update; - log.reset_rollback_info_trimmed_to_riter(); + log = IndexedLog( + info.last_update, + info.log_tail, + on_disk_can_rollback_to, + on_disk_rollback_info_trimmed_to, + std::move(entries)); if (has_divergent_priors || debug_verify_stored_missing) { // build missing @@ -1155,9 +1175,6 @@ public: if (did.count(i->soid)) continue; did.insert(i->soid); - if (i->version > log.can_rollback_to && i->is_rollforward()) - checked.insert(i->soid); - if (i->is_delete()) continue; bufferlist bv; diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc index a34e1401dcd..795e34dae3a 100644 --- a/src/osd/ReplicatedBackend.cc +++ b/src/osd/ReplicatedBackend.cc @@ -376,6 +376,7 @@ public: void generate_transaction( PGTransactionUPtr &pgt, const coll_t &coll, + bool legacy_log_entries, vector &log_entries, ObjectStore::Transaction *t, set *added, @@ -386,7 +387,7 @@ void generate_transaction( assert(removed); for (auto &&le: log_entries) { - le.mod_desc.mark_unrollbackable(); + le.mark_unrollbackable(); auto oiter = pgt->op_map.find(le.soid); if (oiter != pgt->op_map.end() && oiter->second.updated_snaps) { vector snaps( @@ -540,6 +541,7 @@ void ReplicatedBackend::submit_transaction( generate_transaction( t, coll, + !get_osdmap()->test_flag(CEPH_OSDMAP_REQUIRE_KRAKEN), log_entries, &op_t, &added, diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index a72cc797bfc..5e1a4a9e10e 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -9834,7 +9834,7 @@ void ReplicatedPG::mark_all_unfound_lost( pg_log_entry_t::LOST_REVERT, oid, v, m->second.need, 0, osd_reqid_t(), mtime, 0); e.reverting_to = prev; - e.mod_desc.mark_unrollbackable(); + e.mark_unrollbackable(); log_entries.push_back(e); dout(10) << e << dendl; @@ -9852,7 +9852,7 @@ void ReplicatedPG::mark_all_unfound_lost( if (pool.info.require_rollback()) { e.mod_desc.try_rmobject(v.version); } else { - e.mod_desc.mark_unrollbackable(); + e.mark_unrollbackable(); } } // otherwise, just do what we used to do dout(10) << e << dendl; @@ -9995,9 +9995,8 @@ void ReplicatedPG::on_removal(ObjectStore::Transaction *t) // clear log - PGLogEntryHandler rollbacker; + PGLogEntryHandler rollbacker{this, t}; pg_log.roll_forward(&rollbacker); - rollbacker.apply(this, t); write_if_dirty(*t); diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index ad3ece3a74a..7b99a78773e 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -337,6 +337,18 @@ public: map &attrs) override { return get_object_context(hoid, true, &attrs); } + void pgb_set_object_snap_mapping( + const hobject_t &soid, + const set &snaps, + ObjectStore::Transaction *t) override { + return update_object_snap_mapping(t, soid, snaps); + } + void pgb_clear_object_snap_mapping( + const hobject_t &soid, + ObjectStore::Transaction *t) override { + return clear_object_snap_mapping(t, soid); + } + void log_operation( const vector &logv, diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 431dc05478f..3c3ab2fff6c 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -2900,6 +2900,7 @@ struct pg_log_t { eversion_t head; // newest entry eversion_t tail; // version prior to oldest +protected: // We can rollback rollback-able entries > can_rollback_to eversion_t can_rollback_to; @@ -2907,16 +2908,107 @@ struct pg_log_t { // data can be found eversion_t rollback_info_trimmed_to; +public: mempool::osd::list log; // the actual log. - pg_log_t() {} + pg_log_t() = default; + pg_log_t(const eversion_t &last_update, + const eversion_t &log_tail, + const eversion_t &can_rollback_to, + const eversion_t &rollback_info_trimmed_to, + mempool::osd::list &&entries) + : head(last_update), tail(log_tail), can_rollback_to(can_rollback_to), + rollback_info_trimmed_to(rollback_info_trimmed_to), + log(std::move(entries)) {} + pg_log_t(const eversion_t &last_update, + const eversion_t &log_tail, + const eversion_t &can_rollback_to, + const eversion_t &rollback_info_trimmed_to, + const std::list &entries) + : head(last_update), tail(log_tail), can_rollback_to(can_rollback_to), + rollback_info_trimmed_to(rollback_info_trimmed_to) { + for (auto &&entry: entries) { + log.push_back(entry); + } + } void clear() { eversion_t z; - can_rollback_to = head = tail = z; + rollback_info_trimmed_to = can_rollback_to = head = tail = z; log.clear(); } + eversion_t get_rollback_info_trimmed_to() const { + return rollback_info_trimmed_to; + } + eversion_t get_can_rollback_to() const { + return can_rollback_to; + } + + + pg_log_t split_out_child(pg_t child_pgid, unsigned split_bits) { + mempool::osd::list oldlog, childlog; + oldlog.swap(log); + + eversion_t old_tail; + unsigned mask = ~((~0)<soid.get_hash() & mask) == child_pgid.m_seed) { + childlog.push_back(*i); + } else { + log.push_back(*i); + } + oldlog.erase(i++); + } + + return pg_log_t( + head, + tail, + can_rollback_to, + rollback_info_trimmed_to, + std::move(childlog)); + } + + mempool::osd::list rewind_from_head(eversion_t newhead) { + assert(newhead >= tail); + + mempool::osd::list::iterator p = log.end(); + mempool::osd::list divergent; + while (true) { + if (p == log.begin()) { + // yikes, the whole thing is divergent! + ::swap(divergent, log); + break; + } + --p; + if (p->version.version <= newhead.version) { + /* + * look at eversion.version here. we want to avoid a situation like: + * our log: 100'10 (0'0) m 10000004d3a.00000000/head by client4225.1:18529 + * new log: 122'10 (0'0) m 10000004d3a.00000000/head by client4225.1:18529 + * lower_bound = 100'9 + * i.e, same request, different version. If the eversion.version is > the + * lower_bound, we it is divergent. + */ + ++p; + divergent.splice(divergent.begin(), log, p, log.end()); + break; + } + assert(p->version > newhead); + } + head = newhead; + + if (can_rollback_to > newhead) + can_rollback_to = newhead; + + if (rollback_info_trimmed_to > newhead) + rollback_info_trimmed_to = newhead; + + return divergent; + } + bool empty() const { return log.empty(); } @@ -2970,7 +3062,7 @@ WRITE_CLASS_ENCODER(pg_log_t) inline ostream& operator<<(ostream& out, const pg_log_t& log) { out << "log((" << log.tail << "," << log.head << "], crt=" - << log.can_rollback_to << ")"; + << log.get_can_rollback_to() << ")"; return out; } diff --git a/src/test/osd/TestPGLog.cc b/src/test/osd/TestPGLog.cc index 035c489e677..4c2bb06793a 100644 --- a/src/test/osd/TestPGLog.cc +++ b/src/test/osd/TestPGLog.cc @@ -1972,19 +1972,9 @@ TEST_F(PGLogTest, filter_log_1) { const string hit_set_namespace("internal"); - ObjectStore::Transaction t; - pg_info_t info; - list remove_snap; - //bool dirty_info = false; - //bool dirty_big_info = false; - - hobject_t divergent_object; - eversion_t divergent_version; - eversion_t prior_version; - eversion_t newhead; { pg_log_entry_t e; - e.mod_desc.mark_unrollbackable(); + e.mark_unrollbackable(); e.op = pg_log_entry_t::MODIFY; e.soid.pool = pool_id; @@ -2024,12 +2014,22 @@ TEST_F(PGLogTest, filter_log_1) { ASSERT_EQ(total, num_objects); // Some should be removed - log.filter_log(pgid, *osdmap, hit_set_namespace); + { + pg_log_t filtered, reject; + pg_log_t::filter_log( + pgid, *osdmap, hit_set_namespace, log, filtered, reject); + log = IndexedLog(filtered); + } EXPECT_LE(log.log.size(), (size_t)total); // If we filter a second time, there should be the same total total = log.log.size(); - log.filter_log(pgid, *osdmap, hit_set_namespace); + { + pg_log_t filtered, reject; + pg_log_t::filter_log( + pgid, *osdmap, hit_set_namespace, log, filtered, reject); + log = IndexedLog(filtered); + } EXPECT_EQ(log.log.size(), (size_t)total); // Increase pg_num as if there would be a split @@ -2046,7 +2046,12 @@ TEST_F(PGLogTest, filter_log_1) { ASSERT_EQ(ret, 0); // We should have fewer entries after a filter - log.filter_log(pgid, *osdmap, hit_set_namespace); + { + pg_log_t filtered, reject; + pg_log_t::filter_log( + pgid, *osdmap, hit_set_namespace, log, filtered, reject); + log = IndexedLog(filtered); + } EXPECT_LE(log.log.size(), (size_t)total); // Make sure all internal entries are retained -- 2.39.5