From f249dc13de89f31f5a54ab3e6d05d357ede14fd4 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Fri, 4 Nov 2022 08:58:46 -0400 Subject: [PATCH] mds: obviate MDLog::start_entry The major problem here is that the MDLog::_start_entry method puts the current event sequence number in the EMetaBlob of the event (if present). Because of this, no other event can be submitted as this would invalidate the event sequence. Instead, fixup the event sequence during submission and simplify related logic that uses it during EMetaBlob construction. Secondarily, for the purposes of this commit series, _start_entry introduced recursive locks when generating the ESubtreeMap within MDLog::_segment_upkeep. So, this commit is a necessary cleanup. Signed-off-by: Patrick Donnelly --- src/mds/CInode.cc | 1 - src/mds/Locker.cc | 5 ---- src/mds/MDCache.cc | 29 ++++++++-------------- src/mds/MDLog.cc | 50 +++++++++++++++----------------------- src/mds/MDLog.h | 31 +++++++---------------- src/mds/MDSTableClient.cc | 2 +- src/mds/MDSTableServer.cc | 6 ++--- src/mds/Migrator.cc | 8 +++--- src/mds/Server.cc | 38 +++-------------------------- src/mds/StrayManager.cc | 2 -- src/mds/events/EMetaBlob.h | 16 ++++++------ src/mds/journal.cc | 8 +++--- 12 files changed, 62 insertions(+), 134 deletions(-) diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index e6cd03442bbd7..839d5fb656685 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -2409,7 +2409,6 @@ void CInode::finish_scatter_update(ScatterLock *lock, CDir *dir, } EUpdate *le = new EUpdate(mdlog, ename); - mdlog->start_entry(le); le->metablob.add_dir_context(dir); le->metablob.add_dir(dir, true); diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 4e6930be47558..557b830594106 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -2878,7 +2878,6 @@ bool Locker::check_inode_max_size(CInode *in, bool force_wrlock, metablob = &eu->metablob; le = eu; } - mds->mdlog->start_entry(le); mdcache->predirty_journal_parents(mut, metablob, in, 0, PREDIRTY_PRIMARY); // no cow, here! @@ -2989,7 +2988,6 @@ void Locker::adjust_cap_wanted(Capability *cap, int wanted, int issue_seq) if (mdcache->open_file_table.should_log_open(cur)) { ceph_assert(cur->last == CEPH_NOSNAP); EOpen *le = new EOpen(mds->mdlog); - mds->mdlog->start_entry(le); le->add_clean_inode(cur); mds->mdlog->submit_entry(le); } @@ -3603,7 +3601,6 @@ void Locker::_do_snap_update(CInode *in, snapid_t snap, int dirty, snapid_t foll } EUpdate *le = new EUpdate(mds->mdlog, "snap flush"); - mds->mdlog->start_entry(le); MutationRef mut = new MutationImpl(); mut->ls = mds->mdlog->get_current_segment(); @@ -3899,7 +3896,6 @@ bool Locker::_do_cap_update(CInode *in, Capability *cap, // do the update. EUpdate *le = new EUpdate(mds->mdlog, "cap update"); - mds->mdlog->start_entry(le); bool xattr = (dirty & CEPH_CAP_XATTR_EXCL) && m->xattrbl.length() && @@ -5005,7 +5001,6 @@ void Locker::scatter_writebehind(ScatterLock *lock) lock->start_flush(); EUpdate *le = new EUpdate(mds->mdlog, "scatter_writebehind"); - mds->mdlog->start_entry(le); mdcache->predirty_journal_parents(mut, &le->metablob, in, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mut.get(), &le->metablob, in); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index d4ba1e53a4741..f4ff185056a86 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -532,7 +532,6 @@ void MDCache::_create_system_file(CDir *dir, std::string_view name, CInode *in, mut->ls = mds->mdlog->get_current_segment(); EUpdate *le = new EUpdate(mds->mdlog, "create system file"); - mds->mdlog->start_entry(le); if (!in->is_mdsdir()) { predirty_journal_parents(mut, &le->metablob, in, dir, PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); @@ -2106,8 +2105,6 @@ void MDCache::predirty_journal_parents(MutationRef mut, EMetaBlob *blob, bool do_parent_mtime = flags & PREDIRTY_DIR; bool shallow = flags & PREDIRTY_SHALLOW; - ceph_assert(mds->mdlog->entry_is_open()); - // make sure stamp is set if (mut->get_mds_stamp() == utime_t()) mut->set_mds_stamp(ceph_clock_now()); @@ -2391,8 +2388,7 @@ void MDCache::log_leader_commit(metareqid_t reqid) { dout(10) << "log_leader_commit " << reqid << dendl; uncommitted_leaders[reqid].committing = true; - mds->mdlog->start_submit_entry(new ECommitted(reqid), - new C_MDC_CommittedLeader(this, reqid)); + mds->mdlog->submit_entry(new ECommitted(reqid), new C_MDC_CommittedLeader(this, reqid)); } void MDCache::_logged_leader_commit(metareqid_t reqid) @@ -2509,7 +2505,6 @@ ESubtreeMap *MDCache::create_subtree_map() show_subtrees(); ESubtreeMap *le = new ESubtreeMap(); - mds->mdlog->_start_entry(le); map dirs_to_add; @@ -3385,7 +3380,7 @@ void MDCache::handle_resolve_ack(const cref_t &ack) ceph_assert(su); // log commit - mds->mdlog->start_submit_entry(new EPeerUpdate(mds->mdlog, "unknown", p.first, from, + mds->mdlog->submit_entry(new EPeerUpdate(mds->mdlog, "unknown", p.first, from, EPeerUpdate::OP_COMMIT, su->origop), new C_MDC_PeerCommit(this, from, p.first)); mds->mdlog->flush(); @@ -3598,7 +3593,7 @@ void MDCache::disambiguate_my_imports() dout(10) << "ambiguous import auth known, must not be me " << *dir << dendl; cancel_ambiguous_import(dir); - mds->mdlog->start_submit_entry(new EImportFinish(dir, false)); + mds->mdlog->submit_entry(new EImportFinish(dir, false)); // subtree may have been swallowed by another node claiming dir // as their own. @@ -3610,7 +3605,7 @@ void MDCache::disambiguate_my_imports() } else { dout(10) << "ambiguous import auth unclaimed, must be me " << *dir << dendl; finish_ambiguous_import(q->first); - mds->mdlog->start_submit_entry(new EImportFinish(dir, true)); + mds->mdlog->submit_entry(new EImportFinish(dir, true)); } } ceph_assert(my_ambiguous_imports.empty()); @@ -5438,7 +5433,7 @@ bool MDCache::process_imported_caps() finish->session_map); ESessions *le = new ESessions(pv, std::move(rejoin_client_map), std::move(rejoin_client_metadata_map)); - mds->mdlog->start_submit_entry(le, finish); + mds->mdlog->submit_entry(le, finish); mds->mdlog->flush(); rejoin_client_map.clear(); rejoin_client_metadata_map.clear(); @@ -6288,7 +6283,6 @@ void MDCache::queue_file_recover(CInode *in) auto mut(std::make_shared()); mut->ls = mds->mdlog->get_current_segment(); EUpdate *le = new EUpdate(mds->mdlog, "queue_file_recover cow"); - mds->mdlog->start_entry(le); predirty_journal_parents(mut, &le->metablob, in, 0, PREDIRTY_PRIMARY); s.erase(*s.begin()); @@ -6634,7 +6628,6 @@ void MDCache::truncate_inode_finish(CInode *in, LogSegment *ls) pi.inode->fscrypt_last_block = bufferlist(); EUpdate *le = new EUpdate(mds->mdlog, "truncate finish"); - mds->mdlog->start_entry(le); predirty_journal_parents(mut, &le->metablob, in, 0, PREDIRTY_PRIMARY); journal_dirty_inode(mut.get(), &le->metablob, in); @@ -6748,7 +6741,7 @@ void MDCache::purge_inodes(const interval_set& inos, LogSegment *ls) mds->inotable->project_release_ids(inos); version_t piv = mds->inotable->get_projected_version(); ceph_assert(piv != 0); - mds->mdlog->start_submit_entry(new EPurged(inos, ls->seq, piv), + mds->mdlog->submit_entry(new EPurged(inos, ls->seq, piv), new C_MDS_purge_completed_finish(this, inos, ls, piv)); mds->mdlog->flush(); }); @@ -7959,7 +7952,8 @@ bool MDCache::shutdown_pass() if (ls->num_events > 1 || !ls->dirty_dirfrags.empty()) { // Current segment contains events other than subtreemap or // there are dirty dirfrags (see CDir::log_mark_dirty()) - mds->mdlog->start_new_segment(); + auto sle = create_subtree_map(); + mds->mdlog->submit_entry(sle); mds->mdlog->flush(); } } @@ -12066,7 +12060,6 @@ void MDCache::dispatch_fragment_dir(MDRequestRef& mdr) mdr->ls = mds->mdlog->get_current_segment(); EFragment *le = new EFragment(mds->mdlog, EFragment::OP_PREPARE, basedirfrag, info.bits); - mds->mdlog->start_entry(le); for (const auto& dir : info.dirs) { dirfrag_rollback rollback; @@ -12210,7 +12203,7 @@ void MDCache::_fragment_stored(MDRequestRef& mdr) // journal commit EFragment *le = new EFragment(mds->mdlog, EFragment::OP_COMMIT, basedirfrag, info.bits); - mds->mdlog->start_submit_entry(le, new C_MDC_FragmentCommit(this, basedirfrag, mdr)); + mds->mdlog->submit_entry(le, new C_MDC_FragmentCommit(this, basedirfrag, mdr)); // unfreeze resulting frags @@ -12280,7 +12273,7 @@ void MDCache::_fragment_old_purged(dirfrag_t basedirfrag, int bits, const MDRequ mdr->mark_event("old frags purged"); EFragment *le = new EFragment(mds->mdlog, EFragment::OP_FINISH, basedirfrag, bits); - mds->mdlog->start_submit_entry(le); + mds->mdlog->submit_entry(le); finish_uncommitted_fragment(basedirfrag, EFragment::OP_FINISH); @@ -12495,7 +12488,6 @@ void MDCache::rollback_uncommitted_fragments() MutationRef mut(new MutationImpl()); mut->ls = mds->mdlog->get_current_segment(); EFragment *le = new EFragment(mds->mdlog, EFragment::OP_ROLLBACK, p->first, uf.bits); - mds->mdlog->start_entry(le); bool diri_auth = (diri->authority() != CDIR_AUTH_UNDEF); frag_vec_t old_frags; @@ -13181,7 +13173,6 @@ void MDCache::repair_dirfrag_stats_work(MDRequestRef& mdr) mdr->ls = mds->mdlog->get_current_segment(); EUpdate *le = new EUpdate(mds->mdlog, "repair_dirfrag"); - mds->mdlog->start_entry(le); if (!good_fragstat) { if (pf->fragstat.mtime > frag_info.mtime) diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc index 5e1540862d302..c131da5e4f0da 100644 --- a/src/mds/MDLog.cc +++ b/src/mds/MDLog.cc @@ -267,37 +267,22 @@ void MDLog::append() // ------------------------------------------------- -void MDLog::_start_entry(LogEvent *e) +void MDLog::_submit_entry(LogEvent *le, MDSLogContextBase* c) { + dout(20) << __func__ << " " << *le << dendl; + ceph_assert(ceph_mutex_is_locked_by_me(mds->mds_lock)); ceph_assert(ceph_mutex_is_locked_by_me(submit_mutex)); - - ceph_assert(cur_event == NULL); - cur_event = e; + ceph_assert(!mds->is_any_replay()); + ceph_assert(!mds_is_shutting_down); event_seq++; - EMetaBlob *metablob = e->get_metablob(); + EMetaBlob *metablob = le->get_metablob(); if (metablob) { - metablob->event_seq = event_seq; - metablob->last_subtree_map = get_last_segment_seq(); + for (auto& in : metablob->get_touched_inodes()) { + in->last_journaled = event_seq; + } } -} - -void MDLog::cancel_entry(LogEvent *le) -{ - ceph_assert(le == cur_event); - cur_event = NULL; - delete le; -} - -void MDLog::_submit_entry(LogEvent *le, MDSLogContextBase *c) -{ - ceph_assert(ceph_mutex_is_locked_by_me(submit_mutex)); - ceph_assert(!mds->is_any_replay()); - ceph_assert(!mds_is_shutting_down); - - ceph_assert(le == cur_event); - cur_event = NULL; // let the event register itself in the segment ceph_assert(!segments.empty()); @@ -318,8 +303,14 @@ void MDLog::_submit_entry(LogEvent *le, MDSLogContextBase *c) } unflushed++; - +} + +void MDLog::_segment_upkeep(LogEvent* le) +{ + ceph_assert(ceph_mutex_is_locked_by_me(mds->mds_lock)); + ceph_assert(ceph_mutex_is_locked_by_me(submit_mutex)); uint64_t period = journaler->get_layout_period(); + auto ls = get_current_segment(); // start a new segment? if (le->get_type() == EVENT_SUBTREEMAP || (le->get_type() == EVENT_IMPORTFINISH && mds->is_resolve())) { @@ -327,16 +318,15 @@ void MDLog::_submit_entry(LogEvent *le, MDSLogContextBase *c) // do not insert ESubtreeMap among EImportFinish events that finish // disambiguate imports. Because the ESubtreeMap reflects the subtree // state when all EImportFinish events are replayed. - } else if (ls->end/period != ls->offset/period || - ls->num_events >= events_per_segment) { + } else if (ls->end/period != ls->offset/period || ls->num_events >= events_per_segment) { dout(10) << "submit_entry also starting new segment: last = " - << ls->seq << "/" << ls->offset << ", event seq = " << event_seq << dendl; + << ls->seq << "/" << ls->offset << ", event seq = " << event_seq << dendl; _start_new_segment(); - } else if (debug_subtrees && - le->get_type() != EVENT_SUBTREEMAP_TEST) { + } else if (debug_subtrees && le->get_type() != EVENT_SUBTREEMAP_TEST) { // debug: journal this every time to catch subtree replay bugs. // use a different event id so it doesn't get interpreted as a // LogSegment boundary on replay. + dout(10) << __func__ << ": creating test subtree map" << dendl; LogEvent *sle = mds->mdcache->create_subtree_map(); sle->set_type(EVENT_SUBTREEMAP_TEST); _submit_entry(sle, NULL); diff --git a/src/mds/MDLog.h b/src/mds/MDLog.h index d5e7c5a8a7df2..0e3d26687a2ac 100644 --- a/src/mds/MDLog.h +++ b/src/mds/MDLog.h @@ -130,31 +130,23 @@ public: Journaler *get_journaler() { return journaler; } bool empty() const { return segments.empty(); } + uint64_t get_last_segment_seq() const { + ceph_assert(!segments.empty()); + return segments.rbegin()->first; + } + bool is_capped() const { return mds_is_shutting_down; } void cap(); void kick_submitter(); void shutdown(); - void _start_entry(LogEvent *e); - void start_entry(LogEvent *e) { - std::lock_guard l(submit_mutex); - _start_entry(e); - } - void cancel_entry(LogEvent *e); - void _submit_entry(LogEvent *e, MDSLogContextBase *c); - void submit_entry(LogEvent *e, MDSLogContextBase *c = 0) { - std::lock_guard l(submit_mutex); - _submit_entry(e, c); - submit_cond.notify_all(); - } - void start_submit_entry(LogEvent *e, MDSLogContextBase *c = 0) { + void submit_entry(LogEvent *e, MDSLogContextBase* c = 0) { std::lock_guard l(submit_mutex); - _start_entry(e); _submit_entry(e, c); + _segment_upkeep(e); submit_cond.notify_all(); } - bool entry_is_open() const { return cur_event != NULL; } void wait_for_safe(Context* c); void flush(); @@ -250,10 +242,6 @@ protected: void _submit_thread(); - uint64_t get_last_segment_seq() const { - ceph_assert(!segments.empty()); - return segments.rbegin()->first; - } LogSegment *get_oldest_segment() { return segments.begin()->second; } @@ -303,7 +291,9 @@ private: // -- segments -- void _start_new_segment(); void _prepare_new_segment(); + void _segment_upkeep(LogEvent* le); void _journal_segment_subtree_map(MDSContext *onsync); + void _submit_entry(LogEvent *e, MDSLogContextBase *c); void try_to_commit_open_file_table(uint64_t last_seq); @@ -313,9 +303,6 @@ private: void _trim_expired_segments(); void write_head(MDSContext *onfinish); - // -- events -- - LogEvent *cur_event = nullptr; - bool debug_subtrees; uint64_t events_per_segment; int64_t max_events; diff --git a/src/mds/MDSTableClient.cc b/src/mds/MDSTableClient.cc index 9ded20bb8a86a..fc8103ee99289 100644 --- a/src/mds/MDSTableClient.cc +++ b/src/mds/MDSTableClient.cc @@ -117,7 +117,7 @@ void MDSTableClient::handle_request(const cref_t &m) pending_commit.erase(tid); // log ACK. - mds->mdlog->start_submit_entry(new ETableClient(table, TABLESERVER_OP_ACK, tid), + mds->mdlog->submit_entry(new ETableClient(table, TABLESERVER_OP_ACK, tid), new C_LoggedAck(this, tid)); } else { dout(10) << "got stray ack on tid " << tid << ", ignoring" << dendl; diff --git a/src/mds/MDSTableServer.cc b/src/mds/MDSTableServer.cc index 6e542b4e39656..5514f1e784784 100644 --- a/src/mds/MDSTableServer.cc +++ b/src/mds/MDSTableServer.cc @@ -64,7 +64,6 @@ void MDSTableServer::handle_prepare(const cref_t &req) ETableServer *le = new ETableServer(table, TABLESERVER_OP_PREPARE, req->reqid, from, projected_version, projected_version); - mds->mdlog->start_entry(le); le->mutation = req->bl; mds->mdlog->submit_entry(le, new C_Prepare(this, req, projected_version)); mds->mdlog->flush(); @@ -148,7 +147,7 @@ void MDSTableServer::handle_commit(const cref_t &req) projected_version++; committing_tids.insert(tid); - mds->mdlog->start_submit_entry(new ETableServer(table, TABLESERVER_OP_COMMIT, 0, MDS_RANK_NONE, + mds->mdlog->submit_entry(new ETableServer(table, TABLESERVER_OP_COMMIT, 0, MDS_RANK_NONE, tid, projected_version), new C_Commit(this, req)); } @@ -206,7 +205,7 @@ void MDSTableServer::handle_rollback(const cref_t &req) projected_version++; committing_tids.insert(tid); - mds->mdlog->start_submit_entry(new ETableServer(table, TABLESERVER_OP_ROLLBACK, 0, MDS_RANK_NONE, + mds->mdlog->submit_entry(new ETableServer(table, TABLESERVER_OP_ROLLBACK, 0, MDS_RANK_NONE, tid, projected_version), new C_Rollback(this, req)); } @@ -245,7 +244,6 @@ void MDSTableServer::do_server_update(bufferlist& bl) projected_version++; ETableServer *le = new ETableServer(table, TABLESERVER_OP_SERVER_UPDATE, 0, MDS_RANK_NONE, 0, projected_version); - mds->mdlog->start_entry(le); le->mutation = bl; mds->mdlog->submit_entry(le, new C_ServerUpdate(this, bl)); } diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 8bd875c3450ef..1dd4cb17737ef 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -1905,7 +1905,6 @@ void Migrator::handle_export_ack(const cref_t &m) // log completion. // include export bounds, to ensure they're in the journal. EExport *le = new EExport(mds->mdlog, dir, it->second.peer);; - mds->mdlog->start_entry(le); le->metablob.add_dir_context(dir, EMetaBlob::TO_ROOT); le->metablob.add_dir(dir, false); @@ -2670,7 +2669,6 @@ void Migrator::handle_export_dir(const cref_t &m) // start the journal entry EImportStart *le = new EImportStart(mds->mdlog, dir->dirfrag(), m->bounds, oldauth); - mds->mdlog->start_entry(le); le->metablob.add_dir_context(dir); @@ -2910,7 +2908,7 @@ void Migrator::import_reverse(CDir *dir) } // log our failure - mds->mdlog->start_submit_entry(new EImportFinish(dir, false)); // log failure + mds->mdlog->submit_entry(new EImportFinish(dir, false)); // log failure mdcache->trim(num_dentries); // try trimming dentries @@ -3129,7 +3127,7 @@ void Migrator::import_finish(CDir *dir, bool notify, bool last) MutationRef mut = it->second.mut; import_state.erase(it); - mds->mdlog->start_submit_entry(new EImportFinish(dir, true)); + mds->mdlog->submit_entry(new EImportFinish(dir, true)); // process delayed expires mdcache->process_delayed_expire(dir); @@ -3638,7 +3636,7 @@ void Migrator::handle_export_caps(const cref_t &ex) // journal open client sessions ESessions *le = new ESessions(pv, std::move(client_map), std::move(client_metadata_map)); - mds->mdlog->start_submit_entry(le, finish); + mds->mdlog->submit_entry(le, finish); mds->mdlog->flush(); } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 6d91d5fcda227..91dd0a3bd7d62 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -759,7 +759,7 @@ void Server::handle_client_session(const cref_t &m) ceph_assert(r == 0); log_session_status("ACCEPTED", ""); }); - mdlog->start_submit_entry(new ESession(m->get_source_inst(), true, pv, client_metadata), + mdlog->submit_entry(new ESession(m->get_source_inst(), true, pv, client_metadata), new C_MDS_session_finish(this, session, sseq, true, pv, fin)); mdlog->flush(); } @@ -1438,7 +1438,7 @@ void Server::journal_close_session(Session *session, int state, Context *on_safe auto le = new ESession(session->info.inst, false, pv, inos_to_free, piv, session->delegated_inos); auto fin = new C_MDS_session_finish(this, session, sseq, false, pv, inos_to_free, piv, session->delegated_inos, mdlog->get_current_segment(), on_safe); - mdlog->start_submit_entry(le, fin); + mdlog->submit_entry(le, fin); mdlog->flush(); // clean up requests, too @@ -4525,7 +4525,6 @@ void Server::handle_client_open(MDRequestRef& mdr) if (cur->is_auth() && cur->last == CEPH_NOSNAP && mdcache->open_file_table.should_log_open(cur)) { EOpen *le = new EOpen(mds->mdlog); - mdlog->start_entry(le); le->add_clean_inode(cur); mdlog->submit_entry(le); } @@ -4727,7 +4726,6 @@ void Server::handle_client_openc(MDRequestRef& mdr) // prepare finisher mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "openc"); - mdlog->start_entry(le); le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); journal_allocated_inos(mdr, &le->metablob); mdcache->predirty_journal_parents(mdr, &le->metablob, newi, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); @@ -5366,7 +5364,6 @@ void Server::handle_client_setattr(MDRequestRef& mdr) // project update mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "setattr"); - mdlog->start_entry(le); auto pi = cur->project_inode(mdr); @@ -5458,7 +5455,6 @@ void Server::do_open_truncate(MDRequestRef& mdr, int cmode) mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "open_truncate"); - mdlog->start_entry(le); // prepare auto pi = in->project_inode(mdr); @@ -5591,7 +5587,6 @@ void Server::handle_client_setlayout(MDRequestRef& mdr) // log + wait mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "setlayout"); - mdlog->start_entry(le); le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); @@ -5713,7 +5708,6 @@ void Server::handle_client_setdirlayout(MDRequestRef& mdr) // log + wait mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "setlayout"); - mdlog->start_entry(le); le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); @@ -6307,7 +6301,6 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur) // log + wait mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "set vxattr layout"); - mdlog->start_entry(le); le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); @@ -6352,7 +6345,6 @@ void Server::handle_remove_vxattr(MDRequestRef& mdr, CInode *cur) // log + wait mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "remove dir layout vxattr"); - mdlog->start_entry(le); le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); @@ -6641,7 +6633,6 @@ void Server::handle_client_setxattr(MDRequestRef& mdr) // log + wait mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "setxattr"); - mdlog->start_entry(le); le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); @@ -6711,7 +6702,6 @@ void Server::handle_client_removexattr(MDRequestRef& mdr) // log + wait mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "removexattr"); - mdlog->start_entry(le); le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); @@ -7101,7 +7091,6 @@ void Server::handle_client_mknod(MDRequestRef& mdr) // prepare finisher mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "mknod"); - mdlog->start_entry(le); le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); journal_allocated_inos(mdr, &le->metablob); @@ -7184,7 +7173,6 @@ void Server::handle_client_mkdir(MDRequestRef& mdr) // prepare finisher mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "mkdir"); - mdlog->start_entry(le); le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); journal_allocated_inos(mdr, &le->metablob); mdcache->predirty_journal_parents(mdr, &le->metablob, newi, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); @@ -7269,7 +7257,6 @@ void Server::handle_client_symlink(MDRequestRef& mdr) // prepare finisher mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "symlink"); - mdlog->start_entry(le); le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); journal_allocated_inos(mdr, &le->metablob); mdcache->predirty_journal_parents(mdr, &le->metablob, newi, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); @@ -7472,7 +7459,6 @@ void Server::_link_local(MDRequestRef& mdr, CDentry *dn, CInode *targeti, SnapRe // log + wait EUpdate *le = new EUpdate(mdlog, "link_local"); - mdlog->start_entry(le); le->metablob.add_client_req(mdr->reqid, mdr->client_request->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, targeti, dn->get_dir(), PREDIRTY_DIR, 1); // new dn mdcache->predirty_journal_parents(mdr, &le->metablob, targeti, 0, PREDIRTY_PRIMARY); // targeti @@ -7583,7 +7569,6 @@ void Server::_link_remote(MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targ // add to event mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, inc ? "link_remote":"unlink_remote"); - mdlog->start_entry(le); le->metablob.add_client_req(mdr->reqid, mdr->client_request->get_oldest_client_tid()); if (!mdr->more()->witnessed.empty()) { dout(20) << " noting uncommitted_peers " << mdr->more()->witnessed << dendl; @@ -7713,8 +7698,6 @@ void Server::handle_peer_link_prep(MDRequestRef& mdr) mdr->ls = mdlog->get_current_segment(); EPeerUpdate *le = new EPeerUpdate(mdlog, "peer_link_prep", mdr->reqid, mdr->peer_to_mds, EPeerUpdate::OP_PREPARE, EPeerUpdate::LINK); - mdlog->start_entry(le); - auto pi = dnl->get_inode()->project_inode(mdr); // update journaled target inode @@ -7846,7 +7829,6 @@ void Server::_commit_peer_link(MDRequestRef& mdr, int r, CInode *targeti) // write a commit to the journal EPeerUpdate *le = new EPeerUpdate(mdlog, "peer_link_commit", mdr->reqid, mdr->peer_to_mds, EPeerUpdate::OP_COMMIT, EPeerUpdate::LINK); - mdlog->start_entry(le); submit_mdlog_entry(le, new C_MDS_CommittedPeer(this, mdr), mdr, __func__); mdlog->flush(); } else { @@ -7949,7 +7931,6 @@ void Server::do_link_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef& // journal it EPeerUpdate *le = new EPeerUpdate(mdlog, "peer_link_rollback", rollback.reqid, leader, EPeerUpdate::OP_ROLLBACK, EPeerUpdate::LINK); - mdlog->start_entry(le); le->commit.add_dir_context(parent); le->commit.add_dir(parent, true); le->commit.add_primary_dentry(in->get_projected_parent_dn(), 0, true); @@ -8212,7 +8193,6 @@ void Server::_unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn) // prepare log entry EUpdate *le = new EUpdate(mdlog, "unlink_local"); - mdlog->start_entry(le); le->metablob.add_client_req(mdr->reqid, mdr->client_request->get_oldest_client_tid()); if (!mdr->more()->witnessed.empty()) { dout(20) << " noting uncommitted_peers " << mdr->more()->witnessed << dendl; @@ -8470,7 +8450,6 @@ void Server::handle_peer_rmdir_prep(MDRequestRef& mdr) mdr->ls = mdlog->get_current_segment(); EPeerUpdate *le = new EPeerUpdate(mdlog, "peer_rmdir", mdr->reqid, mdr->peer_to_mds, EPeerUpdate::OP_PREPARE, EPeerUpdate::RMDIR); - mdlog->start_entry(le); le->rollback = mdr->more()->rollback_bl; le->commit.add_dir_context(straydn->get_dir()); @@ -8569,7 +8548,6 @@ void Server::_commit_peer_rmdir(MDRequestRef& mdr, int r, CDentry *straydn) EPeerUpdate *le = new EPeerUpdate(mdlog, "peer_rmdir_commit", mdr->reqid, mdr->peer_to_mds, EPeerUpdate::OP_COMMIT, EPeerUpdate::RMDIR); - mdlog->start_entry(le); submit_mdlog_entry(le, new C_MDS_CommittedPeer(this, mdr), mdr, __func__); mdlog->flush(); } else { @@ -8645,7 +8623,6 @@ void Server::do_rmdir_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef& EPeerUpdate *le = new EPeerUpdate(mdlog, "peer_rmdir_rollback", rollback.reqid, leader, EPeerUpdate::OP_ROLLBACK, EPeerUpdate::RMDIR); - mdlog->start_entry(le); le->commit.add_dir_context(dn->get_dir()); le->commit.add_primary_dentry(dn, in, true); @@ -9207,7 +9184,6 @@ void Server::handle_client_rename(MDRequestRef& mdr) // -- prepare journal entry -- mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "rename"); - mdlog->start_entry(le); le->metablob.add_client_req(mdr->reqid, req->get_oldest_client_tid()); if (!mdr->more()->witnessed.empty()) { dout(20) << " noting uncommitted_peers " << mdr->more()->witnessed << dendl; @@ -10213,7 +10189,6 @@ void Server::handle_peer_rename_prep(MDRequestRef& mdr) mdr->ls = mdlog->get_current_segment(); EPeerUpdate *le = new EPeerUpdate(mdlog, "peer_rename_prep", mdr->reqid, mdr->peer_to_mds, EPeerUpdate::OP_PREPARE, EPeerUpdate::RENAME); - mdlog->start_entry(le); le->rollback = mdr->more()->rollback_bl; bufferlist blah; // inode import data... obviously not used if we're the peer @@ -10221,7 +10196,7 @@ void Server::handle_peer_rename_prep(MDRequestRef& mdr) if (le->commit.empty()) { dout(10) << " empty metablob, skipping journal" << dendl; - mdlog->cancel_entry(le); + delete le; mdr->ls = NULL; _logged_peer_rename(mdr, srcdn, destdn, straydn); } else { @@ -10375,7 +10350,6 @@ void Server::_commit_peer_rename(MDRequestRef& mdr, int r, EPeerUpdate *le = new EPeerUpdate(mdlog, "peer_rename_commit", mdr->reqid, mdr->peer_to_mds, EPeerUpdate::OP_COMMIT, EPeerUpdate::RENAME); - mdlog->start_entry(le); submit_mdlog_entry(le, new C_MDS_CommittedPeer(this, mdr), mdr, __func__); mdlog->flush(); } else { @@ -10719,7 +10693,6 @@ void Server::do_rename_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef // journal it EPeerUpdate *le = new EPeerUpdate(mdlog, "peer_rename_rollback", rollback.reqid, leader, EPeerUpdate::OP_ROLLBACK, EPeerUpdate::RENAME); - mdlog->start_entry(le); if (srcdn && (srcdn->authority().first == whoami || force_journal_src)) { le->commit.add_dir_context(srcdir); @@ -10777,7 +10750,7 @@ void Server::do_rename_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef if (mdr && !mdr->more()->peer_update_journaled) { ceph_assert(le->commit.empty()); - mdlog->cancel_entry(le); + delete le; mut->ls = NULL; _rename_rollback_finish(mut, mdr, srcdn, srcdnpv, destdn, straydn, splits, finish_mdr); } else { @@ -11207,7 +11180,6 @@ void Server::handle_client_mksnap(MDRequestRef& mdr) // journal the inode changes mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "mksnap"); - mdlog->start_entry(le); le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); le->metablob.add_table_transaction(TABLE_SNAP, stid); @@ -11333,7 +11305,6 @@ void Server::handle_client_rmsnap(MDRequestRef& mdr) mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "rmsnap"); - mdlog->start_entry(le); // project the snaprealm auto &newnode = *pi.snapnode; @@ -11485,7 +11456,6 @@ void Server::handle_client_renamesnap(MDRequestRef& mdr) // journal the inode changes mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "renamesnap"); - mdlog->start_entry(le); le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); le->metablob.add_table_transaction(TABLE_SNAP, stid); diff --git a/src/mds/StrayManager.cc b/src/mds/StrayManager.cc index d5827d48002c5..ec63b1d48b810 100644 --- a/src/mds/StrayManager.cc +++ b/src/mds/StrayManager.cc @@ -202,7 +202,6 @@ void StrayManager::_purge_stray_purged( pf->version = dir->pre_dirty(); EUpdate *le = new EUpdate(mds->mdlog, "purge_stray truncate"); - mds->mdlog->start_entry(le); le->metablob.add_dir_context(dir); auto& dl = le->metablob.add_dir(dn->dir, true); @@ -230,7 +229,6 @@ void StrayManager::_purge_stray_purged( dn->push_projected_linkage(); // NULL EUpdate *le = new EUpdate(mds->mdlog, "purge_stray"); - mds->mdlog->start_entry(le); // update dirfrag fragstat, rstat CDir *dir = dn->get_dir(); diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h index 736a509ea211d..6c995dddeb029 100644 --- a/src/mds/events/EMetaBlob.h +++ b/src/mds/events/EMetaBlob.h @@ -340,26 +340,27 @@ private: std::vector > client_reqs; std::vector > client_flushes; + std::set touched; + public: void encode(bufferlist& bl, uint64_t features) const; void decode(bufferlist::const_iterator& bl); void get_inodes(std::set &inodes) const; + const auto& get_touched_inodes(void) const { + return touched; + } void get_paths(std::vector &paths) const; void get_dentries(std::map > &dentries) const; entity_name_t get_client_name() const {return client_name;} void dump(Formatter *f) const; static void generate_test_instances(std::list& ls); - // soft stateadd - uint64_t last_subtree_map; - uint64_t event_seq; // for replay, in certain cases //LogSegment *_segment; EMetaBlob() : opened_ino(0), renamed_dirino(0), - inotablev(0), sessionmapv(0), allocated_ino(0), - last_subtree_map(0), event_seq(0) + inotablev(0), sessionmapv(0), allocated_ino(0) {} EMetaBlob(const EMetaBlob&) = delete; ~EMetaBlob() { } @@ -480,7 +481,8 @@ private: state, in->get_old_inodes()); // make note of where this inode was last journaled - in->last_journaled = event_seq; + + touched.insert(in); //cout << "journaling " << in->inode.ino << " at " << my_offset << std::endl; } @@ -512,7 +514,7 @@ private: } void add_root(bool dirty, CInode *in) { - in->last_journaled = event_seq; + touched.insert(in); //cout << "journaling " << in->inode.ino << " at " << my_offset << std::endl; const auto& pi = in->get_projected_inode(); diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 96e5295ef35da..f4fccad2e5c0b 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -218,7 +218,6 @@ void LogSegment::try_to_expire(MDSRank *mds, MDSGatherBuilder &gather_bld, int o dout(20) << "try_to_expire requeueing snap needflush inode " << *in << dendl; if (!le) { le = new EOpen(mds->mdlog); - mds->mdlog->start_entry(le); } le->add_clean_inode(in); ls->open_files.push_back(&in->item_open_file); @@ -391,15 +390,16 @@ void EMetaBlob::add_dir_context(CDir *dir, int mode) } // was the inode journaled in this blob? - if (event_seq && diri->last_journaled == event_seq) { + if (touched.contains(diri)) { dout(20) << "EMetaBlob::add_dir_context(" << dir << ") already have diri this blob " << *diri << dendl; break; } // have we journaled this inode since the last subtree map? - if (!maybenot && last_subtree_map && diri->last_journaled >= last_subtree_map) { + auto last_segment_seq = mds->mdlog->get_last_segment_seq(); + if (!maybenot && diri->last_journaled >= last_segment_seq) { dout(20) << "EMetaBlob::add_dir_context(" << dir << ") already have diri in this segment (" - << diri->last_journaled << " >= " << last_subtree_map << "), setting maybenot flag " + << diri->last_journaled << " >= " << last_segment_seq << "), setting maybenot flag " << *diri << dendl; maybenot = true; } -- 2.39.5