From: Yan, Zheng Date: Tue, 9 Jul 2019 10:15:35 +0000 (+0800) Subject: mds: track projected inode/fnode in Mutation X-Git-Tag: v17.0.0~1622^2~3 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=e4732810bc5878c0df4dad74b3fa0871b962e45c;p=ceph.git mds: track projected inode/fnode in Mutation Signed-off-by: "Yan, Zheng" --- diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 0970e86e132c4..235b432107287 100755 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -1217,7 +1217,7 @@ void CDir::resync_accounted_rstat() } } -void CDir::assimilate_dirty_rstat_inodes() +void CDir::assimilate_dirty_rstat_inodes(MutationRef& mut) { dout(10) << __func__ << dendl; for (elist::iterator p = dirty_rstat_inodes.begin_use_current(); @@ -1227,16 +1227,18 @@ void CDir::assimilate_dirty_rstat_inodes() if (in->is_frozen()) continue; - auto pi = in->project_inode(); + mut->auth_pin(in); + + auto pi = in->project_inode(mut); pi.inode->version = in->pre_dirty(); - inode->mdcache->project_rstat_inode_to_frag(in, this, 0, 0, NULL); + inode->mdcache->project_rstat_inode_to_frag(mut, in, this, 0, 0, nullptr); } state_set(STATE_ASSIMRSTAT); dout(10) << __func__ << " done" << dendl; } -void CDir::assimilate_dirty_rstat_inodes_finish(MutationRef& mut, EMetaBlob *blob) +void CDir::assimilate_dirty_rstat_inodes_finish(EMetaBlob *blob) { if (!state_test(STATE_ASSIMRSTAT)) return; @@ -1252,9 +1254,6 @@ void CDir::assimilate_dirty_rstat_inodes_finish(MutationRef& mut, EMetaBlob *blo CDentry *dn = in->get_projected_parent_dn(); - mut->auth_pin(in); - mut->add_projected_inode(in); - in->clear_dirty_rstat(); blob->add_primary_dentry(dn, in, true); } @@ -1379,10 +1378,13 @@ void CDir::finish_waiting(uint64_t mask, int result) // dirty/clean -CDir::fnode_ptr CDir::project_fnode() +CDir::fnode_ptr CDir::project_fnode(const MutationRef& mut) { ceph_assert(get_version() != 0); + if (mut && mut->is_projected(this)) + return std::const_pointer_cast(projected_fnode.back()); + auto pf = allocate_fnode(*get_projected_fnode()); if (scrub_infop && scrub_infop->last_scrub_dirty) { @@ -1395,23 +1397,26 @@ CDir::fnode_ptr CDir::project_fnode() } projected_fnode.emplace_back(pf); + if (mut) + mut->add_projected_node(this); dout(10) << __func__ << " " << pf.get() << dendl; return pf; } -void CDir::pop_and_dirty_projected_fnode(LogSegment *ls) +void CDir::pop_and_dirty_projected_fnode(LogSegment *ls, const MutationRef& mut) { ceph_assert(!projected_fnode.empty()); auto pf = std::move(projected_fnode.front()); dout(15) << __func__ << " " << pf.get() << " v" << pf->version << dendl; projected_fnode.pop_front(); + if (mut) + mut->remove_projected_node(this); reset_fnode(std::move(pf)); _mark_dirty(ls); } - version_t CDir::pre_dirty(version_t min) { if (min > projected_version) @@ -1428,7 +1433,8 @@ void CDir::mark_dirty(LogSegment *ls, version_t pv) if (pv) { ceph_assert(get_version() < pv); ceph_assert(pv <= projected_version); - ceph_assert(!projected_fnode.empty()); + ceph_assert(!projected_fnode.empty() && + pv <= projected_fnode.front()->version); } _mark_dirty(ls); diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 2c186b55b7bf2..8edcc4a38b368 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -228,8 +228,8 @@ public: void resync_accounted_fragstat(); void resync_accounted_rstat(); - void assimilate_dirty_rstat_inodes(); - void assimilate_dirty_rstat_inodes_finish(MutationRef& mut, EMetaBlob *blob); + void assimilate_dirty_rstat_inodes(MutationRef& mut); + void assimilate_dirty_rstat_inodes_finish(EMetaBlob *blob); void mark_exporting() { state_set(CDir::STATE_EXPORTING); @@ -275,9 +275,9 @@ public: return const_cast(projected_fnode.back().get()); } - fnode_ptr project_fnode(); + fnode_ptr project_fnode(const MutationRef& mut); - void pop_and_dirty_projected_fnode(LogSegment *ls); + void pop_and_dirty_projected_fnode(LogSegment *ls, const MutationRef& mut); bool is_projected() const { return !projected_fnode.empty(); } version_t pre_dirty(version_t min=0); void _mark_dirty(LogSegment *ls); diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 74599fac9c120..69cd3bdab0198 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -419,8 +419,15 @@ void CInode::clear_dirty_rstat() } } -CInode::projected_inode CInode::project_inode(bool xattr, bool snap) +CInode::projected_inode CInode::project_inode(const MutationRef& mut, + bool xattr, bool snap) { + if (mut && mut->is_projected(this)) { + ceph_assert(!xattr && !snap); + auto _inode = std::const_pointer_cast(projected_nodes.back().inode); + return projected_inode(std::move(_inode), xattr_map_ptr()); + } + auto pi = allocate_inode(*get_projected_inode()); if (scrub_infop && scrub_infop->last_scrub_dirty) { @@ -445,18 +452,21 @@ CInode::projected_inode CInode::project_inode(bool xattr, bool snap) } projected_nodes.emplace_back(pi, xattr ? px : ox , ps); - + if (mut) + mut->add_projected_node(this); dout(15) << __func__ << " " << pi->ino << dendl; return projected_inode(std::move(pi), std::move(px), ps); } -void CInode::pop_and_dirty_projected_inode(LogSegment *ls) +void CInode::pop_and_dirty_projected_inode(LogSegment *ls, const MutationRef& mut) { ceph_assert(!projected_nodes.empty()); auto front = std::move(projected_nodes.front()); dout(15) << __func__ << " v" << front.inode->version << dendl; projected_nodes.pop_front(); + if (mut) + mut->remove_projected_node(this); bool pool_update = get_inode()->layout.pool_id != front.inode->layout.pool_id; bool pin_update = get_inode()->export_pin != front.inode->export_pin; @@ -2347,7 +2357,7 @@ void CInode::finish_scatter_update(ScatterLock *lock, CDir *dir, MutationRef mut(new MutationImpl()); mut->ls = mdlog->get_current_segment(); - auto pf = dir->project_fnode(); + auto pf = dir->project_fnode(mut); std::string_view ename; switch (lock->get_type()) { @@ -2364,7 +2374,7 @@ void CInode::finish_scatter_update(ScatterLock *lock, CDir *dir, if (!is_auth() && lock->get_state() == LOCK_MIX) { dout(10) << __func__ << " try to assimilate dirty rstat on " << *dir << dendl; - dir->assimilate_dirty_rstat_inodes(); + dir->assimilate_dirty_rstat_inodes(mut); } break; @@ -2384,7 +2394,7 @@ void CInode::finish_scatter_update(ScatterLock *lock, CDir *dir, !is_auth() && lock->get_state() == LOCK_MIX) { dout(10) << __func__ << " finish assimilating dirty rstat on " << *dir << dendl; - dir->assimilate_dirty_rstat_inodes_finish(mut, &le->metablob); + dir->assimilate_dirty_rstat_inodes_finish(&le->metablob); if (!(pf->rstat == pf->accounted_rstat)) { if (!mut->is_wrlocked(&nestlock)) { @@ -2397,7 +2407,6 @@ void CInode::finish_scatter_update(ScatterLock *lock, CDir *dir, } pf->version = dir->pre_dirty(); - mut->add_projected_fnode(dir); mdlog->submit_entry(le, new C_Inode_FragUpdate(this, dir, mut)); } else { @@ -2434,7 +2443,7 @@ void CInode::_finish_frag_update(CDir *dir, MutationRef& mut) * un-stale. */ /* for more info on scatterlocks, see comments by Locker::scatter_writebehind */ -void CInode::finish_scatter_gather_update(int type) +void CInode::finish_scatter_gather_update(int type, MutationRef& mut) { LogChannelRef clog = mdcache->mds->clog; @@ -2469,10 +2478,12 @@ void CInode::finish_scatter_gather_update(int type) } CDir::fnode_const_ptr pf; - if (update) - pf = dir->project_fnode(); - else + if (update) { + mut->auth_pin(dir); + pf = dir->project_fnode(mut); + } else { pf = dir->get_projected_fnode(); + } if (pf->accounted_fragstat.version == pi->dirstat.version - 1) { dout(20) << fg << " fragstat " << pf->fragstat << dendl; @@ -2499,6 +2510,7 @@ void CInode::finish_scatter_gather_update(int type) auto _pf = const_cast(pf.get()); _pf->accounted_fragstat = _pf->fragstat; _pf->fragstat.version = _pf->accounted_fragstat.version = pi->dirstat.version; + _pf->version = dir->pre_dirty(); dout(10) << fg << " updated accounted_fragstat " << pf->fragstat << " on " << *dir << dendl; } @@ -2584,17 +2596,19 @@ void CInode::finish_scatter_gather_update(int type) } CDir::fnode_const_ptr pf; - if (update) - pf = dir->project_fnode(); - else + if (update) { + mut->auth_pin(dir); + pf = dir->project_fnode(mut); + } else { pf = dir->get_projected_fnode(); + } if (pf->accounted_rstat.version == pi->rstat.version-1) { // only pull this frag's dirty rstat inodes into the frag if // the frag is non-stale and updateable. if it's stale, // that info will just get thrown out! if (update) - dir->assimilate_dirty_rstat_inodes(); + dir->assimilate_dirty_rstat_inodes(mut); dout(20) << fg << " rstat " << pf->rstat << dendl; dout(20) << fg << " accounted_rstat " << pf->accounted_rstat << dendl; @@ -2613,8 +2627,9 @@ void CInode::finish_scatter_gather_update(int type) if (update) { auto _pf = const_cast(pf.get()); _pf->accounted_rstat = pf->rstat; - dir->dirty_old_rstat.clear(); _pf->rstat.version = _pf->accounted_rstat.version = pi->rstat.version; + _pf->version = dir->pre_dirty(); + dir->dirty_old_rstat.clear(); dir->check_rstats(); dout(10) << fg << " updated accounted_rstat " << pf->rstat << " on " << *dir << dendl; } @@ -2663,7 +2678,7 @@ void CInode::finish_scatter_gather_update(int type) } } -void CInode::finish_scatter_gather_update_accounted(int type, MutationRef& mut, EMetaBlob *metablob) +void CInode::finish_scatter_gather_update_accounted(int type, EMetaBlob *metablob) { dout(10) << __func__ << " " << type << " on " << *this << dendl; ceph_assert(is_auth()); @@ -2677,15 +2692,11 @@ void CInode::finish_scatter_gather_update_accounted(int type, MutationRef& mut, continue; // nothing to do. if (type == CEPH_LOCK_INEST) - dir->assimilate_dirty_rstat_inodes_finish(mut, metablob); + dir->assimilate_dirty_rstat_inodes_finish(metablob); dout(10) << " journaling updated frag accounted_ on " << *dir << dendl; ceph_assert(dir->is_projected()); - auto pf = dir->_get_projected_fnode(); - pf->version = dir->pre_dirty(); - mut->add_projected_fnode(dir); metablob->add_dir(dir, true); - mut->auth_pin(dir); } } diff --git a/src/mds/CInode.h b/src/mds/CInode.h index c9d014067370d..fc03c7a575a85 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -524,12 +524,13 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter &ack) { dout(10) << "file_update_finish on " << *in << dendl; - in->pop_and_dirty_projected_inode(mut->ls); mut->apply(); @@ -2793,7 +2792,7 @@ bool Locker::check_inode_max_size(CInode *in, bool force_wrlock, MutationRef mut(new MutationImpl()); mut->ls = mds->mdlog->get_current_segment(); - auto pi = in->project_inode(); + auto pi = in->project_inode(mut); pi.inode->version = in->pre_dirty(); if (update_max) { @@ -3535,13 +3534,13 @@ void Locker::_do_snap_update(CInode *in, snapid_t snap, int dirty, snapid_t foll CInode::mempool_inode *i; if (oi) { dout(10) << " writing into old inode" << dendl; - auto pi = in->project_inode(); + auto pi = in->project_inode(mut); pi.inode->version = in->pre_dirty(); i = &oi->inode; if (xattrs) px = &oi->xattrs; } else { - auto pi = in->project_inode(xattrs); + auto pi = in->project_inode(mut, xattrs); pi.inode->version = in->pre_dirty(); i = pi.inode.get(); if (xattrs) @@ -3810,12 +3809,12 @@ bool Locker::_do_cap_update(CInode *in, Capability *cap, m->xattrbl.length() && m->head.xattr_version > in->get_projected_inode()->xattr_version; - auto pi = in->project_inode(xattr); - pi.inode->version = in->pre_dirty(); - MutationRef mut(new MutationImpl()); mut->ls = mds->mdlog->get_current_segment(); + auto pi = in->project_inode(mut, xattr); + pi.inode->version = in->pre_dirty(); + _update_cap_fields(in, dirty, m, pi.inode.get()); if (change_max) { @@ -4877,10 +4876,10 @@ void Locker::scatter_writebehind(ScatterLock *lock) in->pre_cow_old_inode(); // avoid cow mayhem - auto pi = in->project_inode(); + auto pi = in->project_inode(mut); pi.inode->version = in->pre_dirty(); - in->finish_scatter_gather_update(lock->get_type()); + in->finish_scatter_gather_update(lock->get_type(), mut); lock->start_flush(); EUpdate *le = new EUpdate(mds->mdlog, "scatter_writebehind"); @@ -4889,7 +4888,7 @@ void Locker::scatter_writebehind(ScatterLock *lock) mdcache->predirty_journal_parents(mut, &le->metablob, in, 0, PREDIRTY_PRIMARY); mdcache->journal_dirty_inode(mut.get(), &le->metablob, in); - in->finish_scatter_gather_update_accounted(lock->get_type(), mut, &le->metablob); + in->finish_scatter_gather_update_accounted(lock->get_type(), &le->metablob); mds->mdlog->submit_entry(le, new C_Locker_ScatterWB(this, lock, mut)); } @@ -4898,7 +4897,8 @@ void Locker::scatter_writebehind_finish(ScatterLock *lock, MutationRef& mut) { CInode *in = static_cast(lock->get_parent()); dout(10) << "scatter_writebehind_finish on " << *lock << " on " << *in << dendl; - in->pop_and_dirty_projected_inode(mut->ls); + + mut->apply(); lock->finish_flush(); @@ -4914,7 +4914,6 @@ void Locker::scatter_writebehind_finish(ScatterLock *lock, MutationRef& mut) } } - mut->apply(); drop_locks(mut.get()); mut->cleanup(); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index ede52ede7424f..5e84a7c7736eb 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -1021,16 +1021,6 @@ void MDCache::try_subtree_merge(CDir *dir) } } -class C_MDC_SubtreeMergeWB : public MDCacheLogContext { - CInode *in; - MutationRef mut; -public: - C_MDC_SubtreeMergeWB(MDCache *mdc, CInode *i, MutationRef& m) : MDCacheLogContext(mdc), in(i), mut(m) {} - void finish(int r) override { - mdcache->subtree_merge_writebehind_finish(in, mut); - } -}; - void MDCache::try_subtree_merge_at(CDir *dir, set *to_eval, bool adjust_pop) { dout(10) << "try_subtree_merge_at " << *dir << dendl; @@ -1082,18 +1072,6 @@ void MDCache::try_subtree_merge_at(CDir *dir, set *to_eval, bool adjust } } -void MDCache::subtree_merge_writebehind_finish(CInode *in, MutationRef& mut) -{ - dout(10) << "subtree_merge_writebehind_finish on " << in << dendl; - in->pop_and_dirty_projected_inode(mut->ls); - - mut->apply(); - mds->locker->drop_locks(mut.get()); - mut->cleanup(); - - in->auth_unpin(this); -} - void MDCache::eval_subtree_root(CInode *diri) { // evaluate subtree inode filelock? @@ -1818,7 +1796,8 @@ void MDCache::journal_dirty_inode(MutationImpl *mut, EMetaBlob *metablob, CInode // nested --------------------------------------------------------------- -void MDCache::project_rstat_inode_to_frag(CInode *cur, CDir *parent, snapid_t first, +void MDCache::project_rstat_inode_to_frag(const MutationRef& mut, + CInode *cur, CDir *parent, snapid_t first, int linkunlink, SnapRealm *prealm) { CDentry *parentdn = cur->get_projected_parent_dn(); @@ -1861,7 +1840,7 @@ void MDCache::project_rstat_inode_to_frag(CInode *cur, CDir *parent, snapid_t fi } // hacky const CInode::mempool_inode *pi; - if (update && cur->is_projected()) { + if (update && mut->is_projected(cur)) { pi = cur->_get_projected_inode(); } else { pi = cur->get_projected_inode().get(); @@ -2232,9 +2211,8 @@ void MDCache::predirty_journal_parents(MutationRef mut, EMetaBlob *blob, // inode -> dirfrag mut->auth_pin(parent); - mut->add_projected_fnode(parent); - auto pf = parent->project_fnode(); + auto pf = parent->project_fnode(mut); pf->version = parent->pre_dirty(); if (do_parent_mtime || linkunlink) { @@ -2303,7 +2281,7 @@ void MDCache::predirty_journal_parents(MutationRef mut, EMetaBlob *blob, parent->resync_accounted_rstat(); // now push inode rstats into frag - project_rstat_inode_to_frag(cur, parent, first, linkunlink, prealm); + project_rstat_inode_to_frag(mut, cur, parent, first, linkunlink, prealm); cur->clear_dirty_rstat(); } @@ -2358,12 +2336,11 @@ void MDCache::predirty_journal_parents(MutationRef mut, EMetaBlob *blob, // dirfrag -> diri mut->auth_pin(pin); - mut->add_projected_inode(pin); lsi.push_front(pin); pin->pre_cow_old_inode(); // avoid cow mayhem! - auto pi = pin->project_inode(); + auto pi = pin->project_inode(mut); pi.inode->version = pin->pre_dirty(); // dirstat @@ -6363,8 +6340,8 @@ void MDCache::queue_file_recover(CInode *in) s.erase(*s.rbegin()); dout(10) << " snaps in [" << in->first << "," << in->last << "] are " << s << dendl; if (s.size() > 1) { - CInode::mempool_inode pi = in->project_inode(); - pi->version = in->pre_dirty(); + auto pi = in->project_inode(mut); + pi.inode.version = in->pre_dirty(); auto mut(std::make_shared()); mut->ls = mds->mdlog->get_current_segment(); @@ -6394,7 +6371,6 @@ void MDCache::queue_file_recover(CInode *in) void MDCache::_queued_file_recover_cow(CInode *in, MutationRef& mut) { - in->pop_and_dirty_projected_inode(mut->ls); mut->apply(); mds->locker->drop_locks(mut.get()); mut->cleanup(); @@ -6579,16 +6555,15 @@ void MDCache::truncate_inode_finish(CInode *in, LogSegment *ls) ceph_assert(p != ls->truncating_inodes.end()); ls->truncating_inodes.erase(p); + MutationRef mut(new MutationImpl()); + mut->ls = mds->mdlog->get_current_segment(); + // update - auto pi = in->project_inode(); + auto pi = in->project_inode(mut); pi.inode->version = in->pre_dirty(); pi.inode->truncate_from = 0; pi.inode->truncate_pending--; - MutationRef mut(new MutationImpl()); - mut->ls = mds->mdlog->get_current_segment(); - mut->add_projected_inode(in); - EUpdate *le = new EUpdate(mds->mdlog, "truncate finish"); mds->mdlog->start_entry(le); @@ -11982,7 +11957,7 @@ void MDCache::dispatch_fragment_dir(MDRequestRef& mdr) // dft lock if (diri->is_auth()) { // journal dirfragtree - auto pi = diri->project_inode(); + auto pi = diri->project_inode(mdr); pi.inode->version = diri->pre_dirty(); predirty_journal_parents(mdr, &le->metablob, diri, 0, PREDIRTY_PRIMARY); journal_dirty_inode(mdr.get(), &le->metablob, diri); @@ -12020,9 +11995,6 @@ void MDCache::_fragment_logged(MDRequestRef& mdr) << " on " << *diri << dendl; mdr->mark_event("prepare logged"); - if (diri->is_auth()) - diri->pop_and_dirty_projected_inode(mdr->ls); - mdr->apply(); // mark scatterlock // store resulting frags @@ -12426,8 +12398,7 @@ void MDCache::rollback_uncommitted_fragments() } if (diri_auth) { - auto pi = diri->project_inode(); - mut->add_projected_inode(diri); + auto pi = diri->project_inode(mut); pi.inode->version = diri->pre_dirty(); predirty_journal_parents(mut, &le->metablob, diri, 0, PREDIRTY_PRIMARY); le->metablob.add_primary_dentry(diri->get_projected_parent_dn(), diri, true); @@ -13075,10 +13046,9 @@ void MDCache::repair_dirfrag_stats_work(MDRequestRef& mdr) return; } - auto _pf = dir->project_fnode(); + auto _pf = dir->project_fnode(mdr); _pf->version = dir->pre_dirty(); pf = _pf; - mdr->add_projected_fnode(dir); mdr->ls = mds->mdlog->get_current_segment(); EUpdate *le = new EUpdate(mds->mdlog, "repair_dirfrag"); @@ -13235,8 +13205,7 @@ void MDCache::upgrade_inode_snaprealm_work(MDRequestRef& mdr) return; // project_snaprealm() upgrades snaprealm format - auto pi = in->project_inode(false, true); - mdr->add_projected_inode(in); + auto pi = in->project_inode(mdr, false, true); pi.inode->version = in->pre_dirty(); mdr->ls = mds->mdlog->get_current_segment(); diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 1f1c74613831b..13764f054b005 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -314,7 +314,6 @@ class MDCache { void map_dirfrag_set(const list& dfs, set& result); void try_subtree_merge(CDir *root); void try_subtree_merge_at(CDir *root, set *to_eval, bool adjust_pop=true); - void subtree_merge_writebehind_finish(CInode *in, MutationRef& mut); void eval_subtree_root(CInode *diri); CDir *get_subtree_root(CDir *dir); CDir *get_projected_subtree_root(CDir *dir); @@ -409,7 +408,8 @@ class MDCache { CInode **pcow_inode=0, CDentry::linkage_t *dnl=0); void journal_dirty_inode(MutationImpl *mut, EMetaBlob *metablob, CInode *in, snapid_t follows=CEPH_NOSNAP); - void project_rstat_inode_to_frag(CInode *cur, CDir *parent, snapid_t first, + void project_rstat_inode_to_frag(const MutationRef& mut, + CInode *cur, CDir *parent, snapid_t first, int linkunlink, SnapRealm *prealm); void _project_rstat_inode_to_frag(const CInode::mempool_inode* inode, snapid_t ofirst, snapid_t last, CDir *parent, int linkunlink, bool update_inode); diff --git a/src/mds/Mutation.cc b/src/mds/Mutation.cc index a5d7b0be057d2..69fe4b2732f07 100644 --- a/src/mds/Mutation.cc +++ b/src/mds/Mutation.cc @@ -14,6 +14,7 @@ #include "Mutation.h" #include "ScatterLock.h" +#include "CInode.h" #include "CDir.h" // MutationImpl @@ -215,33 +216,6 @@ void MutationImpl::_clear_remote_auth_pinned(ObjectState &stat) --num_remote_auth_pins; } -void MutationImpl::add_projected_inode(CInode *in) -{ - projected_inodes.push_back(in); -} - -void MutationImpl::pop_and_dirty_projected_inodes() -{ - while (!projected_inodes.empty()) { - CInode *in = projected_inodes.front(); - projected_inodes.pop_front(); - in->pop_and_dirty_projected_inode(ls); - } -} - -void MutationImpl::add_projected_fnode(CDir *dir) -{ - projected_fnodes.push_back(dir); -} - -void MutationImpl::pop_and_dirty_projected_fnodes() -{ - for (const auto& dir : projected_fnodes) { - dir->pop_and_dirty_projected_fnode(ls); - } - projected_fnodes.clear(); -} - void MutationImpl::add_updated_lock(ScatterLock *lock) { updated_locks.push_back(lock); @@ -256,24 +230,34 @@ void MutationImpl::add_cow_inode(CInode *in) void MutationImpl::add_cow_dentry(CDentry *dn) { pin(dn); - dirty_cow_dentries.push_back(pair(dn, dn->get_projected_version())); + dirty_cow_dentries.emplace_back(dn, dn->get_projected_version()); } void MutationImpl::apply() { - pop_and_dirty_projected_inodes(); - - for (const auto& in : dirty_cow_inodes) + for (auto& obj : projected_nodes) { + if (CInode *in = dynamic_cast(obj)) + in->pop_and_dirty_projected_inode(ls, nullptr); + } + + for (const auto& in : dirty_cow_inodes) { in->_mark_dirty(ls); + } - for (const auto& [dn, v] : dirty_cow_dentries) + for (const auto& [dn, v] : dirty_cow_dentries) { dn->mark_dirty(v, ls); + } - pop_and_dirty_projected_fnodes(); + for (auto& obj : projected_nodes) { + if (CDir *dir = dynamic_cast(obj)) + dir->pop_and_dirty_projected_fnode(ls, nullptr); + } for (const auto& lock : updated_locks) { lock->mark_dirty(); } + + projected_nodes.clear(); } void MutationImpl::cleanup() diff --git a/src/mds/Mutation.h b/src/mds/Mutation.h index 9bbb4b87c2b18..1c5b95371882e 100644 --- a/src/mds/Mutation.h +++ b/src/mds/Mutation.h @@ -201,10 +201,15 @@ public: void set_remote_auth_pinned(MDSCacheObject* object, mds_rank_t from); void _clear_remote_auth_pinned(ObjectState& stat); - void add_projected_inode(CInode *in); - void pop_and_dirty_projected_inodes(); - void add_projected_fnode(CDir *dir); - void pop_and_dirty_projected_fnodes(); + void add_projected_node(MDSCacheObject* obj) { + projected_nodes.insert(obj); + } + void remove_projected_node(MDSCacheObject* obj) { + projected_nodes.erase(obj); + } + bool is_projected(MDSCacheObject *obj) const { + return projected_nodes.count(obj); + } void add_updated_lock(ScatterLock *lock); void add_cow_inode(CInode *in); void add_cow_dentry(CDentry *dn); @@ -256,8 +261,7 @@ public: bool killed = false; // for applying projected inode changes - std::list projected_inodes; - std::vector projected_fnodes; + std::set projected_nodes; std::list updated_locks; std::list dirty_cow_inodes; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 2413b4cd786af..17d973c04b16f 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -4709,7 +4709,6 @@ public: ceph_assert(r == 0); // apply - in->pop_and_dirty_projected_inode(mdr->ls); mdr->apply(); MDSRank *mds = get_mds(); @@ -4962,7 +4961,7 @@ void Server::handle_client_setattr(MDRequestRef& mdr) EUpdate *le = new EUpdate(mdlog, "setattr"); mdlog->start_entry(le); - auto pi = cur->project_inode(); + auto pi = cur->project_inode(mdr); if (mask & CEPH_SETATTR_UID) pi.inode->uid = req->head.args.setattr.uid; @@ -5043,7 +5042,7 @@ void Server::do_open_truncate(MDRequestRef& mdr, int cmode) mdlog->start_entry(le); // prepare - auto pi = in->project_inode(); + auto pi = in->project_inode(mdr); pi.inode->version = in->pre_dirty(); pi.inode->mtime = pi.inode->ctime = mdr->get_op_stamp(); if (mdr->get_op_stamp() > pi.inode->rstat.rctime) @@ -5159,7 +5158,7 @@ void Server::handle_client_setlayout(MDRequestRef& mdr) return; // project update - auto pi = cur->project_inode(); + auto pi = cur->project_inode(mdr); pi.inode->layout = layout; // add the old pool to the inode pi.inode->add_old_pool(old_layout.pool_id); @@ -5287,7 +5286,7 @@ void Server::handle_client_setdirlayout(MDRequestRef& mdr) if (!check_access(mdr, cur, access)) return; - auto pi = cur->project_inode(); + auto pi = cur->project_inode(mdr); pi.inode->layout = layout; pi.inode->version = cur->pre_dirty(); @@ -5531,7 +5530,7 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur) if (check_layout_vxattr(mdr, rest, value, &layout) < 0) return; - auto pi = cur->project_inode(); + auto pi = cur->project_inode(mdr); pi.inode->layout = layout; mdr->no_early_reply = true; pip = pi.inode.get(); @@ -5555,7 +5554,7 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur) if (!mds->locker->acquire_locks(mdr, lov)) return; - auto pi = cur->project_inode(); + auto pi = cur->project_inode(mdr); int64_t old_pool = pi.inode->layout.pool_id; pi.inode->add_old_pool(old_pool); pi.inode->layout = layout; @@ -5581,7 +5580,7 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur) if (!xlock_policylock(mdr, cur, false, new_realm)) return; - auto pi = cur->project_inode(false, new_realm); + auto pi = cur->project_inode(mdr, false, new_realm); pi.inode->quota = quota; if (new_realm) { @@ -5615,7 +5614,7 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur) if (!xlock_policylock(mdr, cur)) return; - auto pi = cur->project_inode(); + auto pi = cur->project_inode(mdr); cur->set_export_pin(rank); pip = pi.inode.get(); } else if (name == "ceph.dir.pin.random"sv) { @@ -5644,7 +5643,7 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur) if (!xlock_policylock(mdr, cur)) return; - auto pi = cur->project_inode(); + auto pi = cur->project_inode(mdr); cur->setxattr_ephemeral_rand(val); pip = pi.inode.get(); } else if (name == "ceph.dir.pin.distributed"sv) { @@ -5665,7 +5664,7 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur) if (!xlock_policylock(mdr, cur)) return; - auto pi = cur->project_inode(); + auto pi = cur->project_inode(mdr); cur->setxattr_ephemeral_dist(val); pip = pi.inode.get(); } else { @@ -5723,7 +5722,7 @@ void Server::handle_remove_vxattr(MDRequestRef& mdr, CInode *cur) if (!mds->locker->acquire_locks(mdr, lov)) return; - auto pi = cur->project_inode(); + auto pi = cur->project_inode(mdr); pi.inode->clear_layout(); pi.inode->version = cur->pre_dirty(); @@ -5751,26 +5750,6 @@ void Server::handle_remove_vxattr(MDRequestRef& mdr, CInode *cur) respond_to_request(mdr, -ENODATA); } -class C_MDS_inode_xattr_update_finish : public ServerLogContext { - CInode *in; -public: - - C_MDS_inode_xattr_update_finish(Server *s, MDRequestRef& r, CInode *i) : - ServerLogContext(s, r), in(i) { } - void finish(int r) override { - ceph_assert(r == 0); - - // apply - in->pop_and_dirty_projected_inode(mdr->ls); - - mdr->apply(); - - get_mds()->balancer->hit_inode(in, META_POP_IWR); - - server->respond_to_request(mdr, 0); - } -}; - void Server::handle_client_setxattr(MDRequestRef& mdr) { const cref_t &req = mdr->client_request; @@ -5844,7 +5823,7 @@ void Server::handle_client_setxattr(MDRequestRef& mdr) dout(10) << "setxattr '" << name << "' len " << len << " on " << *cur << dendl; // project update - auto pi = cur->project_inode(true); + auto pi = cur->project_inode(mdr, true); pi.inode->version = cur->pre_dirty(); pi.inode->ctime = mdr->get_op_stamp(); if (mdr->get_op_stamp() > pi.inode->rstat.rctime) @@ -5912,7 +5891,7 @@ void Server::handle_client_removexattr(MDRequestRef& mdr) dout(10) << "removexattr '" << name << "' on " << *cur << dendl; // project update - auto pi = cur->project_inode(true); + auto pi = cur->project_inode(mdr, true); auto &px = *pi.xattrs; pi.inode->version = cur->pre_dirty(); pi.inode->ctime = mdr->get_op_stamp(); @@ -6351,7 +6330,7 @@ void Server::_link_local(MDRequestRef& mdr, CDentry *dn, CInode *targeti) version_t tipv = targeti->pre_dirty(); // project inode update - auto pi = targeti->project_inode(); + auto pi = targeti->project_inode(mdr); pi.inode->nlink++; pi.inode->ctime = mdr->get_op_stamp(); if (mdr->get_op_stamp() > pi.inode->rstat.rctime) @@ -6395,8 +6374,6 @@ void Server::_link_local_finish(MDRequestRef& mdr, CDentry *dn, CInode *targeti, dn->mark_dirty(dnpv, mdr->ls); // target inode - targeti->pop_and_dirty_projected_inode(mdr->ls); - mdr->apply(); MDRequestRef null_ref; @@ -6608,7 +6585,7 @@ void Server::handle_peer_link_prep(MDRequestRef& mdr) EPeerUpdate::OP_PREPARE, EPeerUpdate::LINK); mdlog->start_entry(le); - auto pi = dnl->get_inode()->project_inode(); + auto pi = dnl->get_inode()->project_inode(mdr); // update journaled target inode bool inc; @@ -6689,7 +6666,6 @@ void Server::_logged_peer_link(MDRequestRef& mdr, CInode *targeti, bool adjust_r ceph_assert(g_conf()->mds_kill_link_at != 6); // update the target - targeti->pop_and_dirty_projected_inode(mdr->ls); mdr->apply(); // hit pop @@ -6794,14 +6770,12 @@ void Server::do_link_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef& dout(10) << " target is " << *in << dendl; ceph_assert(!in->is_projected()); // live peer request hold versionlock xlock. - auto pi = in->project_inode(); + auto pi = in->project_inode(mut); pi.inode->version = in->pre_dirty(); - mut->add_projected_inode(in); // parent dir rctime CDir *parent = in->get_projected_parent_dn()->get_dir(); - auto pf = parent->project_fnode(); - mut->add_projected_fnode(parent); + auto pf = parent->project_fnode(mut); pf->version = parent->pre_dirty(); if (pf->fragstat.mtime == pi.inode->ctime) { pf->fragstat.mtime = rollback.old_dir_mtime; @@ -7101,7 +7075,7 @@ void Server::_unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn) // the unlinked dentry dn->pre_dirty(); - auto pi = in->project_inode(); + auto pi = in->project_inode(mdr); { std::string t; dn->make_path_string(t, true); @@ -7132,7 +7106,6 @@ void Server::_unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn) pi.inode->update_backtrace(); le->metablob.add_primary_dentry(straydn, in, true, true); } else { - mdr->add_projected_inode(in); // remote link. update remote inode. mdcache->predirty_journal_parents(mdr, &le->metablob, in, dn->get_dir(), PREDIRTY_DIR, -1); mdcache->predirty_journal_parents(mdr, &le->metablob, in, 0, PREDIRTY_PRIMARY); @@ -7184,18 +7157,15 @@ void Server::_unlink_local_finish(MDRequestRef& mdr, // unlink main dentry dn->get_dir()->unlink_inode(dn); dn->pop_projected_linkage(); + dn->mark_dirty(dnpv, mdr->ls); // relink as stray? (i.e. was primary link?) if (straydn) { dout(20) << " straydn is " << *straydn << dendl; straydn->pop_projected_linkage(); - - strayin->pop_and_dirty_projected_inode(mdr->ls); - mdcache->touch_dentry_bottom(straydn); } - dn->mark_dirty(dnpv, mdr->ls); mdr->apply(); mdcache->send_dentry_unlink(dn, straydn, mdr); @@ -8287,7 +8257,7 @@ void Server::_rename_prepare(MDRequestRef& mdr, ceph_assert(straydn); // moving to straydn. // link--, and move. if (destdn->is_auth()) { - auto pi= oldin->project_inode(); //project_snaprealm + auto pi= oldin->project_inode(mdr); //project_snaprealm pi.inode->version = straydn->pre_dirty(pi.inode->version); pi.inode->update_backtrace(); tpi = pi.inode.get(); @@ -8296,7 +8266,7 @@ void Server::_rename_prepare(MDRequestRef& mdr, } else if (destdnl->is_remote()) { // nlink-- targeti if (oldin->is_auth()) { - auto pi = oldin->project_inode(); + auto pi = oldin->project_inode(mdr); pi.inode->version = oldin->pre_dirty(); tpi = pi.inode.get(); } @@ -8312,14 +8282,14 @@ void Server::_rename_prepare(MDRequestRef& mdr, destdn->push_projected_linkage(srcdnl->get_remote_ino(), srcdnl->get_remote_d_type()); // srci if (srci->is_auth()) { - auto pi = srci->project_inode(); + auto pi = srci->project_inode(mdr); pi.inode->version = srci->pre_dirty(); spi = pi.inode.get(); } } else { dout(10) << " will merge remote onto primary link" << dendl; if (destdn->is_auth()) { - auto pi = oldin->project_inode(); + auto pi = oldin->project_inode(mdr); pi.inode->version = mdr->more()->pvmap[destdn] = destdn->pre_dirty(oldin->get_version()); spi = pi.inode.get(); } @@ -8343,7 +8313,7 @@ void Server::_rename_prepare(MDRequestRef& mdr, dout(10) << " noting renamed dir open frags " << metablob->renamed_dir_frags << dendl; } } - auto pi = srci->project_inode(); // project snaprealm if srcdnl->is_primary + auto pi = srci->project_inode(mdr); // project snaprealm if srcdnl->is_primary // & srcdnl->snaprealm pi.inode->version = mdr->more()->pvmap[destdn] = destdn->pre_dirty(oldpv); pi.inode->update_backtrace(); @@ -8609,13 +8579,13 @@ void Server::_rename_apply(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, C // nlink-- targeti if (destdn->is_auth()) - oldin->pop_and_dirty_projected_inode(mdr->ls); + oldin->pop_and_dirty_projected_inode(mdr->ls, mdr); mdcache->touch_dentry_bottom(straydn); // drop dn as quickly as possible. } else if (destdnl->is_remote()) { destdn->get_dir()->unlink_inode(destdn, false); if (oldin->is_auth()) { - oldin->pop_and_dirty_projected_inode(mdr->ls); + oldin->pop_and_dirty_projected_inode(mdr->ls, mdr); } else if (mdr->peer_request) { if (mdr->peer_request->desti_snapbl.length() > 0) { ceph_assert(oldin->snaprealm); @@ -8666,7 +8636,7 @@ void Server::_rename_apply(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, C destdn->mark_dirty(mdr->more()->pvmap[destdn], mdr->ls); // in if (in->is_auth()) { - in->pop_and_dirty_projected_inode(mdr->ls); + in->pop_and_dirty_projected_inode(mdr->ls, mdr); } else if (mdr->peer_request) { if (mdr->peer_request->srci_snapbl.length() > 0) { ceph_assert(in->snaprealm); @@ -8678,7 +8648,7 @@ void Server::_rename_apply(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, C } } else { dout(10) << "merging remote onto primary link" << dendl; - oldin->pop_and_dirty_projected_inode(mdr->ls); + oldin->pop_and_dirty_projected_inode(mdr->ls, mdr); } } else { // primary if (linkmerge) { @@ -8729,7 +8699,7 @@ void Server::_rename_apply(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, C } if (destdn->is_auth()) - in->pop_and_dirty_projected_inode(mdr->ls); + in->pop_and_dirty_projected_inode(mdr->ls, mdr); } // src @@ -9222,8 +9192,7 @@ static void _rollback_repair_dir(MutationRef& mut, CDir *dir, rename_rollback::drec &r, utime_t ctime, bool isdir, const nest_info_t &rstat) { - auto pf = dir->project_fnode(); - mut->add_projected_fnode(dir); + auto pf = dir->project_fnode(mut); pf->version = dir->pre_dirty(); if (isdir) { @@ -9376,12 +9345,10 @@ void Server::do_rename_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef bool projected; CDir *pdir = in->get_projected_parent_dir(); if (pdir->authority().first == whoami) { - auto pi = in->project_inode(); - mut->add_projected_inode(in); + auto pi = in->project_inode(mut); pi.inode->version = in->pre_dirty(); if (pdir != srcdir) { - auto pf = pdir->project_fnode(); - mut->add_projected_fnode(pdir); + auto pf = pdir->project_fnode(mut); pf->version = pdir->pre_dirty(); } if (pi.inode->ctime == rollback.ctime) @@ -9449,12 +9416,10 @@ void Server::do_rename_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef CInode::inode_ptr ti; CDir *pdir = target->get_projected_parent_dir(); if (pdir->authority().first == whoami) { - auto pi = target->project_inode(); - mut->add_projected_inode(target); + auto pi = target->project_inode(mut); pi.inode->version = target->pre_dirty(); if (pdir != srcdir) { - auto pf = pdir->project_fnode(); - mut->add_projected_fnode(pdir); + auto pf = pdir->project_fnode(mut); pf->version = pdir->pre_dirty(); } ti = pi.inode; @@ -9966,7 +9931,7 @@ void Server::handle_client_mksnap(MDRequestRef& mdr) info.name = snapname; info.stamp = mdr->get_op_stamp(); - auto pi = diri->project_inode(false, true); + auto pi = diri->project_inode(mdr, false, true); pi.inode->ctime = info.stamp; if (info.stamp > pi.inode->rstat.rctime) pi.inode->rstat.rctime = info.stamp; @@ -10004,7 +9969,6 @@ void Server::_mksnap_finish(MDRequestRef& mdr, CInode *diri, SnapInfo &info) int op = (diri->snaprealm? CEPH_SNAP_OP_CREATE : CEPH_SNAP_OP_SPLIT); - diri->pop_and_dirty_projected_inode(mdr->ls); mdr->apply(); mds->snapclient->commit(mdr->more()->stid, mdr->ls); @@ -10104,7 +10068,7 @@ void Server::handle_client_rmsnap(MDRequestRef& mdr) ceph_assert(mds->snapclient->get_cached_version() >= stid); // journal - auto pi = diri->project_inode(false, true); + auto pi = diri->project_inode(mdr, false, true); pi.inode->version = diri->pre_dirty(); pi.inode->ctime = mdr->get_op_stamp(); if (mdr->get_op_stamp() > pi.inode->rstat.rctime) @@ -10139,7 +10103,6 @@ void Server::_rmsnap_finish(MDRequestRef& mdr, CInode *diri, snapid_t snapid) snapid_t seq; decode(seq, p); - diri->pop_and_dirty_projected_inode(mdr->ls); mdr->apply(); mds->snapclient->commit(stid, mdr->ls); @@ -10248,7 +10211,7 @@ void Server::handle_client_renamesnap(MDRequestRef& mdr) ceph_assert(mds->snapclient->get_cached_version() >= stid); // journal - auto pi = diri->project_inode(false, true); + auto pi = diri->project_inode(mdr, false, true); pi.inode->ctime = mdr->get_op_stamp(); if (mdr->get_op_stamp() > pi.inode->rstat.rctime) pi.inode->rstat.rctime = mdr->get_op_stamp(); @@ -10280,7 +10243,6 @@ void Server::_renamesnap_finish(MDRequestRef& mdr, CInode *diri, snapid_t snapid { dout(10) << "_renamesnap_finish " << *mdr << " " << snapid << dendl; - diri->pop_and_dirty_projected_inode(mdr->ls); mdr->apply(); mds->snapclient->commit(mdr->more()->stid, mdr->ls); diff --git a/src/mds/StrayManager.cc b/src/mds/StrayManager.cc index 7e4e2e13e8c5b..ee2a99e30635a 100644 --- a/src/mds/StrayManager.cc +++ b/src/mds/StrayManager.cc @@ -181,8 +181,7 @@ void StrayManager::_purge_stray_purged( MutationRef mut(new MutationImpl()); mut->ls = mds->mdlog->get_current_segment(); - auto pi = in->project_inode(); - mut->add_projected_inode(in); + auto pi = in->project_inode(mut); pi.inode->size = 0; pi.inode->max_size_ever = 0; pi.inode->client_ranges.clear(); @@ -191,8 +190,7 @@ void StrayManager::_purge_stray_purged( pi.inode->version = in->pre_dirty(); CDir *dir = dn->get_dir(); - auto pf = dir->project_fnode(); - mut->add_projected_fnode(dir); + auto pf = dir->project_fnode(mut); pf->version = dir->pre_dirty(); EUpdate *le = new EUpdate(mds->mdlog, "purge_stray truncate"); @@ -228,8 +226,7 @@ void StrayManager::_purge_stray_purged( // update dirfrag fragstat, rstat CDir *dir = dn->get_dir(); - auto pf = dir->project_fnode(); - mut->add_projected_fnode(dir); + auto pf = dir->project_fnode(mut); pf->version = dir->pre_dirty(); if (in->is_dir()) pf->fragstat.nsubdirs--; diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h index fd8d2e5379650..56d7adea7456b 100644 --- a/src/mds/events/EMetaBlob.h +++ b/src/mds/events/EMetaBlob.h @@ -450,11 +450,9 @@ private: state |= fullbit::STATE_EPHEMERAL_RANDOM; } - // make note of where this inode was last journaled - in->last_journaled = event_seq; - //cout << "journaling " << in->inode.ino << " at " << my_offset << std::endl; - const auto& pi = in->get_projected_inode(); + ceph_assert(pi->version > 0); + if ((state & fullbit::STATE_DIRTY) && pi->is_backtrace_updated()) state |= fullbit::STATE_DIRTYPARENT; @@ -467,6 +465,10 @@ private: lump.add_dfull(dn->get_name(), dn->first, dn->last, dn->get_projected_version(), pi, in->dirfragtree, in->get_projected_xattrs(), in->symlink, in->oldest_snap, snapbl, state, in->get_old_inodes()); + + // make note of where this inode was last journaled + in->last_journaled = event_seq; + //cout << "journaling " << in->inode.ino << " at " << my_offset << std::endl; } // convenience: primary or remote? figure it out.