From: Yan, Zheng Date: Sat, 14 Jul 2018 08:33:19 +0000 (+0800) Subject: mds: use smart pointer to manager CDir::fnode X-Git-Tag: wip-pdonnell-testing-20200918.022351~527^2~4 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=56656cb07d9613eb7be4e729f7433f982c5a4870;p=ceph-ci.git mds: use smart pointer to manager CDir::fnode Signed-off-by: "Yan, Zheng" --- diff --git a/src/mds/CDentry.cc b/src/mds/CDentry.cc index 34bc91359cc..cd85d9d9505 100644 --- a/src/mds/CDentry.cc +++ b/src/mds/CDentry.cc @@ -190,7 +190,7 @@ void CDentry::mark_dirty(version_t pv, LogSegment *ls) _mark_dirty(ls); // mark dir too - dir->mark_dirty(pv, ls); + dir->mark_dirty(ls, pv); } diff --git a/src/mds/CDentry.h b/src/mds/CDentry.h index 98c37ca2f32..86efabd76e1 100644 --- a/src/mds/CDentry.h +++ b/src/mds/CDentry.h @@ -237,7 +237,7 @@ public: version_t pre_dirty(version_t min=0); void _mark_dirty(LogSegment *ls); - void mark_dirty(version_t projected_dirv, LogSegment *ls); + void mark_dirty(version_t pv, LogSegment *ls); void mark_clean(); void mark_new(); diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 9916abc4145..0970e86e132 100755 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -46,6 +46,8 @@ int CDir::num_frozen_trees = 0; int CDir::num_freezing_trees = 0; +CDir::fnode_const_ptr CDir::empty_fnode = CDir::allocate_fnode(); + class CDirContext : public MDSContext { protected: @@ -136,22 +138,22 @@ ostream& operator<<(ostream& out, const CDir& dir) if (dir.state_test(CDir::STATE_ASSIMRSTAT)) out << "|assimrstat"; // fragstat - out << " " << dir.fnode.fragstat; - if (!(dir.fnode.fragstat == dir.fnode.accounted_fragstat)) - out << "/" << dir.fnode.accounted_fragstat; + out << " " << dir.get_fnode()->fragstat; + if (!(dir.get_fnode()->fragstat == dir.get_fnode()->accounted_fragstat)) + out << "/" << dir.get_fnode()->accounted_fragstat; if (g_conf()->mds_debug_scatterstat && dir.is_projected()) { - const fnode_t *pf = dir.get_projected_fnode(); + const auto& pf = dir.get_projected_fnode(); out << "->" << pf->fragstat; if (!(pf->fragstat == pf->accounted_fragstat)) out << "/" << pf->accounted_fragstat; } // rstat - out << " " << dir.fnode.rstat; - if (!(dir.fnode.rstat == dir.fnode.accounted_rstat)) - out << "/" << dir.fnode.accounted_rstat; + out << " " << dir.get_fnode()->rstat; + if (!(dir.get_fnode()->rstat == dir.get_fnode()->accounted_rstat)) + out << "/" << dir.get_fnode()->accounted_rstat; if (g_conf()->mds_debug_scatterstat && dir.is_projected()) { - const fnode_t *pf = dir.get_projected_fnode(); + const auto& pf = dir.get_projected_fnode(); out << "->" << pf->rstat; if (!(pf->rstat == pf->accounted_rstat)) out << "/" << pf->accounted_rstat; @@ -208,7 +210,8 @@ CDir::CDir(CInode *in, frag_t fg, MDCache *mdcache, bool auth) : { // auth ceph_assert(in->is_dir()); - if (auth) state_set(STATE_AUTH); + if (auth) + state_set(STATE_AUTH); } /** @@ -248,27 +251,27 @@ bool CDir::check_rstats(bool scrub) bool good = true; // fragstat - if(!frag_info.same_sums(fnode.fragstat)) { + if(!frag_info.same_sums(fnode->fragstat)) { dout(1) << "mismatch between head items and fnode.fragstat! printing dentries" << dendl; dout(1) << "get_num_head_items() = " << get_num_head_items() - << "; fnode.fragstat.nfiles=" << fnode.fragstat.nfiles - << " fnode.fragstat.nsubdirs=" << fnode.fragstat.nsubdirs << dendl; + << "; fnode.fragstat.nfiles=" << fnode->fragstat.nfiles + << " fnode.fragstat.nsubdirs=" << fnode->fragstat.nsubdirs << dendl; good = false; } else { dout(20) << "get_num_head_items() = " << get_num_head_items() - << "; fnode.fragstat.nfiles=" << fnode.fragstat.nfiles - << " fnode.fragstat.nsubdirs=" << fnode.fragstat.nsubdirs << dendl; + << "; fnode.fragstat.nfiles=" << fnode->fragstat.nfiles + << " fnode.fragstat.nsubdirs=" << fnode->fragstat.nsubdirs << dendl; } // rstat - if (!nest_info.same_sums(fnode.rstat)) { + if (!nest_info.same_sums(fnode->rstat)) { dout(1) << "mismatch between child accounted_rstats and my rstats!" << dendl; dout(1) << "total of child dentrys: " << nest_info << dendl; - dout(1) << "my rstats: " << fnode.rstat << dendl; + dout(1) << "my rstats: " << fnode->rstat << dendl; good = false; } else { dout(20) << "total of child dentrys: " << nest_info << dendl; - dout(20) << "my rstats: " << fnode.rstat << dendl; + dout(20) << "my rstats: " << fnode->rstat << dendl; } if (!good) { @@ -283,11 +286,11 @@ bool CDir::check_rstats(bool scrub) } } - ceph_assert(frag_info.nfiles == fnode.fragstat.nfiles); - ceph_assert(frag_info.nsubdirs == fnode.fragstat.nsubdirs); - ceph_assert(nest_info.rbytes == fnode.rstat.rbytes); - ceph_assert(nest_info.rfiles == fnode.rstat.rfiles); - ceph_assert(nest_info.rsubdirs == fnode.rstat.rsubdirs); + ceph_assert(frag_info.nfiles == fnode->fragstat.nfiles); + ceph_assert(frag_info.nsubdirs == fnode->fragstat.nsubdirs); + ceph_assert(nest_info.rbytes == fnode->rstat.rbytes); + ceph_assert(nest_info.rfiles == fnode->rstat.rfiles); + ceph_assert(nest_info.rsubdirs == fnode->rstat.rsubdirs); } } dout(10) << "check_rstats complete on " << this << dendl; @@ -858,22 +861,24 @@ void CDir::steal_dentry(CDentry *dn) } else if (dn->last == CEPH_NOSNAP) { num_head_items++; + auto _fnode = _get_fnode(); + if (dn->get_linkage()->is_primary()) { CInode *in = dn->get_linkage()->get_inode(); const auto& pi = in->get_projected_inode(); if (in->is_dir()) { - fnode.fragstat.nsubdirs++; + _fnode->fragstat.nsubdirs++; if (in->item_pop_lru.is_on_list()) pop_lru_subdirs.push_back(&in->item_pop_lru); } else { - fnode.fragstat.nfiles++; + _fnode->fragstat.nfiles++; } - fnode.rstat.rbytes += pi->accounted_rstat.rbytes; - fnode.rstat.rfiles += pi->accounted_rstat.rfiles; - fnode.rstat.rsubdirs += pi->accounted_rstat.rsubdirs; - fnode.rstat.rsnaps += pi->accounted_rstat.rsnaps; - if (pi->accounted_rstat.rctime > fnode.rstat.rctime) - fnode.rstat.rctime = pi->accounted_rstat.rctime; + _fnode->rstat.rbytes += pi->accounted_rstat.rbytes; + _fnode->rstat.rfiles += pi->accounted_rstat.rfiles; + _fnode->rstat.rsubdirs += pi->accounted_rstat.rsubdirs; + _fnode->rstat.rsnaps += pi->accounted_rstat.rsnaps; + if (pi->accounted_rstat.rctime > fnode->rstat.rctime) + _fnode->rstat.rctime = pi->accounted_rstat.rctime; if (in->is_any_caps()) adjust_num_inodes_with_caps(1); @@ -883,9 +888,9 @@ void CDir::steal_dentry(CDentry *dn) dirty_rstat_inodes.push_back(&in->dirty_rstat_item); } else if (dn->get_linkage()->is_remote()) { if (dn->get_linkage()->get_remote_d_type() == DT_DIR) - fnode.fragstat.nsubdirs++; + _fnode->fragstat.nsubdirs++; else - fnode.fragstat.nfiles++; + _fnode->fragstat.nfiles++; } } else { num_snap_items++; @@ -1009,10 +1014,10 @@ void CDir::split(int bits, std::vector* subs, MDSContext::vec& waiters, b nest_info_t rstatdiff; frag_info_t fragstatdiff; - if (fnode.accounted_rstat.version == rstat_version) - rstatdiff.add_delta(fnode.accounted_rstat, fnode.rstat); - if (fnode.accounted_fragstat.version == dirstat_version) - fragstatdiff.add_delta(fnode.accounted_fragstat, fnode.fragstat); + if (fnode->accounted_rstat.version == rstat_version) + rstatdiff.add_delta(fnode->accounted_rstat, fnode->rstat); + if (fnode->accounted_fragstat.version == dirstat_version) + fragstatdiff.add_delta(fnode->accounted_fragstat, fnode->fragstat); dout(10) << " rstatdiff " << rstatdiff << " fragstatdiff " << fragstatdiff << dendl; map dentry_waiters; @@ -1024,7 +1029,6 @@ void CDir::split(int bits, std::vector* subs, MDSContext::vec& waiters, b CDir *f = new CDir(inode, fg, cache, is_auth()); f->state_set(state & (MASK_STATE_FRAGMENT_KEPT | STATE_COMPLETE)); f->get_replicas() = get_replicas(); - f->set_version(get_version()); f->pop_me = pop_me; f->pop_me.scale(fac); @@ -1074,21 +1078,25 @@ void CDir::split(int bits, std::vector* subs, MDSContext::vec& waiters, b // FIXME: handle dirty old rstat // fix up new frag fragstats - for (int i=0; ifnode.rstat.version = rstat_version; - f->fnode.accounted_rstat = f->fnode.rstat; - f->fnode.fragstat.version = dirstat_version; - f->fnode.accounted_fragstat = f->fnode.fragstat; - dout(10) << " rstat " << f->fnode.rstat << " fragstat " << f->fnode.fragstat + auto _fnode = f->_get_fnode(); + _fnode->version = f->projected_version = get_version(); + _fnode->rstat.version = rstat_version; + _fnode->accounted_rstat = _fnode->rstat; + _fnode->fragstat.version = dirstat_version; + _fnode->accounted_fragstat = _fnode->fragstat; + dout(10) << " rstat " << _fnode->rstat << " fragstat " << _fnode->fragstat << " on " << *f << dendl; - } - // give any outstanding frag stat differential to first frag - dout(10) << " giving rstatdiff " << rstatdiff << " fragstatdiff" << fragstatdiff - << " to " << *subfrags[0] << dendl; - subfrags[0]->fnode.accounted_rstat.add(rstatdiff); - subfrags[0]->fnode.accounted_fragstat.add(fragstatdiff); + if (i == 0) { + // give any outstanding frag stat differential to first frag + dout(10) << " giving rstatdiff " << rstatdiff << " fragstatdiff" << fragstatdiff + << " to " << *subfrags[0] << dendl; + _fnode->accounted_rstat.add(rstatdiff); + _fnode->accounted_fragstat.add(fragstatdiff); + } + } finish_old_fragment(waiters, replay); } @@ -1109,6 +1117,8 @@ void CDir::merge(const std::vector& subs, MDSContext::vec& waiters, bool prepare_new_fragment(replay); + auto _fnode = _get_fnode(); + nest_info_t rstatdiff; frag_info_t fragstatdiff; bool touched_mtime, touched_chattr; @@ -1121,10 +1131,10 @@ void CDir::merge(const std::vector& subs, MDSContext::vec& waiters, bool dout(10) << " subfrag " << dir->get_frag() << " " << *dir << dendl; ceph_assert(!dir->is_auth() || dir->is_complete() || replay); - if (dir->fnode.accounted_rstat.version == rstat_version) - rstatdiff.add_delta(dir->fnode.accounted_rstat, dir->fnode.rstat); - if (dir->fnode.accounted_fragstat.version == dirstat_version) - fragstatdiff.add_delta(dir->fnode.accounted_fragstat, dir->fnode.fragstat, + if (dir->get_fnode()->accounted_rstat.version == rstat_version) + rstatdiff.add_delta(dir->get_fnode()->accounted_rstat, dir->get_fnode()->rstat); + if (dir->get_fnode()->accounted_fragstat.version == dirstat_version) + fragstatdiff.add_delta(dir->get_fnode()->accounted_fragstat, dir->get_fnode()->fragstat, &touched_mtime, &touched_chattr); dir->prepare_old_fragment(dentry_waiters, replay); @@ -1141,8 +1151,8 @@ void CDir::merge(const std::vector& subs, MDSContext::vec& waiters, bool } // merge version - if (dir->get_version() > get_version()) - set_version(dir->get_version()); + if (dir->get_version() > _fnode->version) + _fnode->version = projected_version = dir->get_version(); // merge state state_set(dir->get_state() & MASK_STATE_FRAGMENT_KEPT); @@ -1165,13 +1175,13 @@ void CDir::merge(const std::vector& subs, MDSContext::vec& waiters, bool mark_complete(); // FIXME: merge dirty old rstat - fnode.rstat.version = rstat_version; - fnode.accounted_rstat = fnode.rstat; - fnode.accounted_rstat.add(rstatdiff); + _fnode->rstat.version = rstat_version; + _fnode->accounted_rstat = _fnode->rstat; + _fnode->accounted_rstat.add(rstatdiff); - fnode.fragstat.version = dirstat_version; - fnode.accounted_fragstat = fnode.fragstat; - fnode.accounted_fragstat.add(fragstatdiff); + _fnode->fragstat.version = dirstat_version; + _fnode->accounted_fragstat = _fnode->fragstat; + _fnode->accounted_fragstat.add(fragstatdiff); init_fragment_pins(); } @@ -1181,7 +1191,7 @@ void CDir::merge(const std::vector& subs, MDSContext::vec& waiters, bool void CDir::resync_accounted_fragstat() { - fnode_t *pf = get_projected_fnode(); + auto pf = _get_projected_fnode(); const auto& pi = inode->get_projected_inode(); if (pf->accounted_fragstat.version != pi->dirstat.version) { @@ -1196,7 +1206,7 @@ void CDir::resync_accounted_fragstat() */ void CDir::resync_accounted_rstat() { - fnode_t *pf = get_projected_fnode(); + auto pf = _get_projected_fnode(); const auto& pi = inode->get_projected_inode(); if (pf->accounted_rstat.version != pi->rstat.version) { @@ -1369,32 +1379,36 @@ void CDir::finish_waiting(uint64_t mask, int result) // dirty/clean -fnode_t *CDir::project_fnode() +CDir::fnode_ptr CDir::project_fnode() { ceph_assert(get_version() != 0); - auto &p = projected_fnode.emplace_back(*get_projected_fnode()); + + auto pf = allocate_fnode(*get_projected_fnode()); if (scrub_infop && scrub_infop->last_scrub_dirty) { - p.localized_scrub_stamp = scrub_infop->last_local.time; - p.localized_scrub_version = scrub_infop->last_local.version; - p.recursive_scrub_stamp = scrub_infop->last_recursive.time; - p.recursive_scrub_version = scrub_infop->last_recursive.version; + pf->localized_scrub_stamp = scrub_infop->last_local.time; + pf->localized_scrub_version = scrub_infop->last_local.version; + pf->recursive_scrub_stamp = scrub_infop->last_recursive.time; + pf->recursive_scrub_version = scrub_infop->last_recursive.version; scrub_infop->last_scrub_dirty = false; scrub_maybe_delete_info(); } - dout(10) << __func__ << " " << &p << dendl; - return &p; + projected_fnode.emplace_back(pf); + dout(10) << __func__ << " " << pf.get() << dendl; + return pf; } void CDir::pop_and_dirty_projected_fnode(LogSegment *ls) { ceph_assert(!projected_fnode.empty()); - auto &front = projected_fnode.front(); - dout(15) << __func__ << " " << &front << " v" << front.version << dendl; - fnode = front; - _mark_dirty(ls); + auto pf = std::move(projected_fnode.front()); + dout(15) << __func__ << " " << pf.get() << " v" << pf->version << dendl; + projected_fnode.pop_front(); + + reset_fnode(std::move(pf)); + _mark_dirty(ls); } @@ -1407,11 +1421,16 @@ version_t CDir::pre_dirty(version_t min) return projected_version; } -void CDir::mark_dirty(version_t pv, LogSegment *ls) +void CDir::mark_dirty(LogSegment *ls, version_t pv) { - ceph_assert(get_version() < pv); - ceph_assert(pv <= projected_version); - fnode.version = pv; + ceph_assert(is_auth()); + + if (pv) { + ceph_assert(get_version() < pv); + ceph_assert(pv <= projected_version); + ceph_assert(!projected_fnode.empty()); + } + _mark_dirty(ls); } @@ -1461,8 +1480,10 @@ void CDir::log_mark_dirty() if (is_dirty() || projected_version > get_version()) return; // noop if it is already dirty or will be dirty - version_t pv = pre_dirty(); - mark_dirty(pv, cache->mds->mdlog->get_current_segment()); + auto _fnode = allocate_fnode(*get_fnode()); + _fnode->version = pre_dirty(); + reset_fnode(std::move(_fnode)); + mark_dirty(cache->mds->mdlog->get_current_segment()); } void CDir::mark_complete() { @@ -1516,7 +1537,9 @@ void CDir::fetch(MDSContext *c, std::string_view want_dn, bool ignore_authpinnab dout(7) << "fetch dirfrag for unlinked directory, mark complete" << dendl; if (get_version() == 0) { ceph_assert(inode->is_auth()); - set_version(1); + auto _fnode = allocate_fnode(); + _fnode->version = 1; + reset_fnode(std::move(_fnode)); if (state_test(STATE_REJOINUNDEF)) { ceph_assert(cache->mds->is_rejoin()); @@ -1939,8 +1962,9 @@ void CDir::_omap_fetched(bufferlist& hdrbl, map& omap, if (get_version() == 0) { ceph_assert(!is_projected()); ceph_assert(!state_test(STATE_COMMITTING)); - fnode = got_fnode; - projected_version = committing_version = committed_version = got_fnode.version; + auto _fnode = allocate_fnode(got_fnode); + reset_fnode(std::move(_fnode)); + projected_version = committing_version = committed_version = get_version(); if (state_test(STATE_REJOINUNDEF)) { ceph_assert(cache->mds->is_rejoin()); @@ -1958,13 +1982,13 @@ void CDir::_omap_fetched(bufferlist& hdrbl, map& omap, SnapRealm *realm = inode->find_snaprealm(); if (!realm->have_past_parents_open()) { dout(10) << " no snap purge, one or more past parents NOT open" << dendl; - } else if (fnode.snap_purged_thru < realm->get_last_destroyed()) { + } else if (fnode->snap_purged_thru < realm->get_last_destroyed()) { snaps = &realm->get_snaps(); - dout(10) << " snap_purged_thru " << fnode.snap_purged_thru + dout(10) << " snap_purged_thru " << fnode->snap_purged_thru << " < " << realm->get_last_destroyed() << ", snap purge based on " << *snaps << dendl; if (get_num_snap_items() == 0) { - fnode.snap_purged_thru = realm->get_last_destroyed(); + const_cast(fnode->snap_purged_thru) = realm->get_last_destroyed(); force_dirty = true; } } @@ -2073,8 +2097,11 @@ void CDir::go_bad(bool complete) } if (complete) { - if (get_version() == 0) - set_version(1); + if (get_version() == 0) { + auto _fnode = allocate_fnode(); + _fnode->version = 1; + reset_fnode(std::move(_fnode)); + } state_set(STATE_BADFRAG); mark_complete(); @@ -2150,9 +2177,9 @@ void CDir::_omap_commit(int op_prio) SnapRealm *realm = inode->find_snaprealm(); if (!realm->have_past_parents_open()) { dout(10) << " no snap purge, one or more past parents NOT open" << dendl; - } else if (fnode.snap_purged_thru < realm->get_last_destroyed()) { + } else if (fnode->snap_purged_thru < realm->get_last_destroyed()) { snaps = &realm->get_snaps(); - dout(10) << " snap_purged_thru " << fnode.snap_purged_thru + dout(10) << " snap_purged_thru " << fnode->snap_purged_thru << " < " << realm->get_last_destroyed() << ", snap purge based on " << *snaps << dendl; // fnode.snap_purged_thru = realm->get_last_destroyed(); @@ -2258,7 +2285,7 @@ void CDir::_omap_commit(int op_prio) * off last, we cannot get our header into an incorrect state. */ bufferlist header; - encode(fnode, header); + encode(*fnode, header); op.omap_set_header(header); if (!to_set.empty()) @@ -2487,7 +2514,7 @@ void CDir::encode_export(bufferlist& bl) ENCODE_START(1, 1, bl); ceph_assert(!is_projected()); encode(first, bl); - encode(fnode, bl); + encode(*fnode, bl); encode(dirty_old_rstat, bl); encode(committed_version, bl); @@ -2519,9 +2546,14 @@ void CDir::decode_import(bufferlist::const_iterator& blp, LogSegment *ls) { DECODE_START(1, blp); decode(first, blp); - decode(fnode, blp); + { + auto _fnode = allocate_fnode(); + decode(*_fnode, blp); + reset_fnode(std::move(_fnode)); + } + update_projected_version(); + decode(dirty_old_rstat, blp); - projected_version = fnode.version; decode(committed_version, blp); committing_version = committed_version; @@ -2550,11 +2582,11 @@ void CDir::decode_import(bufferlist::const_iterator& blp, LogSegment *ls) // did we import some dirty scatterlock data? if (dirty_old_rstat.size() || - !(fnode.rstat == fnode.accounted_rstat)) { + !(fnode->rstat == fnode->accounted_rstat)) { cache->mds->locker->mark_updated_scatterlock(&inode->nestlock); ls->dirty_dirfrag_nest.push_back(&inode->item_dirty_dirfrag_nest); } - if (!(fnode.fragstat == fnode.accounted_fragstat)) { + if (!(fnode->fragstat == fnode->accounted_fragstat)) { cache->mds->locker->mark_updated_scatterlock(&inode->filelock); ls->dirty_dirfrag_dir.push_back(&inode->item_dirty_dirfrag_dir); } @@ -2819,13 +2851,13 @@ void CDir::verify_fragstat() } } - if (c.nsubdirs != fnode.fragstat.nsubdirs || - c.nfiles != fnode.fragstat.nfiles) { - dout(0) << "verify_fragstat failed " << fnode.fragstat << " on " << *this << dendl; + if (c.nsubdirs != fnode->fragstat.nsubdirs || + c.nfiles != fnode->fragstat.nfiles) { + dout(0) << "verify_fragstat failed " << fnode->fragstat << " on " << *this << dendl; dout(0) << " i count " << c << dendl; ceph_abort(); } else { - dout(0) << "verify_fragstat ok " << fnode.fragstat << " on " << *this << dendl; + dout(0) << "verify_fragstat ok " << fnode->fragstat << " on " << *this << dendl; } } #endif @@ -3287,17 +3319,17 @@ void CDir::scrub_info_create() const // break out of const-land to set up implicit initial state CDir *me = const_cast(this); - fnode_t *fn = me->get_projected_fnode(); + const auto& pf = me->get_projected_fnode(); std::unique_ptr si(new scrub_info_t()); si->last_recursive.version = si->recursive_start.version = - fn->recursive_scrub_version; + pf->recursive_scrub_version; si->last_recursive.time = si->recursive_start.time = - fn->recursive_scrub_stamp; + pf->recursive_scrub_stamp; - si->last_local.version = fn->localized_scrub_version; - si->last_local.time = fn->localized_scrub_stamp; + si->last_local.version = pf->localized_scrub_version; + si->last_local.time = pf->localized_scrub_stamp; me->scrub_infop.swap(si); } diff --git a/src/mds/CDir.h b/src/mds/CDir.h index a907836aa51..2c186b55b7b 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -50,6 +50,15 @@ public: typedef mempool::mds_co::map dentry_key_map; typedef mempool::mds_co::set dentry_key_set; + using fnode_ptr = std::shared_ptr; + using fnode_const_ptr = std::shared_ptr; + + template + static fnode_ptr allocate_fnode(Args && ...args) { + static mempool::mds_co::pool_allocator allocator; + return std::allocate_shared(allocator, std::forward(args)...); + } + // -- freezing -- struct freeze_tree_state_t { CDir *dir; // freezing/frozen tree root @@ -231,27 +240,42 @@ public: inode->num_exporting_dirs--; } - version_t get_version() const { return fnode.version; } - void set_version(version_t v) { + version_t get_version() const { return fnode->version; } + void update_projected_version() { ceph_assert(projected_fnode.empty()); - projected_version = fnode.version = v; + projected_version = fnode->version; } version_t get_projected_version() const { return projected_version; } - const fnode_t *get_projected_fnode() const { - if (projected_fnode.empty()) - return &fnode; - else - return &projected_fnode.back(); + void reset_fnode(fnode_const_ptr&& ptr) { + fnode = std::move(ptr); + } + + const fnode_const_ptr& get_fnode() const { + return fnode; } - fnode_t *get_projected_fnode() { + // only used for updating newly allocated CDir + fnode_t* _get_fnode() { + if (fnode == empty_fnode) + reset_fnode(allocate_fnode()); + return const_cast(fnode.get()); + } + + const fnode_const_ptr& get_projected_fnode() const { if (projected_fnode.empty()) - return &fnode; + return fnode; else - return &projected_fnode.back(); + return projected_fnode.back(); + } + + // fnode should have already been projected in caller's context + fnode_t* _get_projected_fnode() { + ceph_assert(!projected_fnode.empty()); + return const_cast(projected_fnode.back().get()); } - fnode_t *project_fnode(); + + fnode_ptr project_fnode(); void pop_and_dirty_projected_fnode(LogSegment *ls); bool is_projected() const { return !projected_fnode.empty(); } @@ -263,7 +287,7 @@ public: get(PIN_DIRTY); } } - void mark_dirty(version_t pv, LogSegment *ls); + void mark_dirty(LogSegment *ls, version_t pv=0); void mark_clean(); bool is_new() { return item_new.is_on_list(); } @@ -435,7 +459,7 @@ public: void _encode_base(ceph::buffer::list& bl) { ENCODE_START(1, 1, bl); encode(first, bl); - encode(fnode, bl); + encode(*fnode, bl); encode(dir_rep, bl); encode(dir_rep_by, bl); ENCODE_FINISH(bl); @@ -443,7 +467,11 @@ public: void _decode_base(ceph::buffer::list::const_iterator& p) { DECODE_START(1, p); decode(first, p); - decode(fnode, p); + { + auto _fnode = allocate_fnode(); + decode(*_fnode, p); + reset_fnode(std::move(_fnode)); + } decode(dir_rep, p); decode(dir_rep_by, p); DECODE_FINISH(p); @@ -604,7 +632,6 @@ public: CInode *inode; // my inode frag_t frag; // my frag - fnode_t fnode; snapid_t first = 2; mempool::mds_co::compact_map dirty_old_rstat; // [value.first,key] @@ -668,8 +695,14 @@ protected: void _encode_dentry(CDentry *dn, ceph::buffer::list& bl, const std::set *snaps); void _committed(int r, version_t v); + static fnode_const_ptr empty_fnode; + // fnode is a pointer to constant fnode_t, the constant fnode_t can be shared + // by CDir and log events. To update fnode, read-copy-update should be used. + + fnode_const_ptr fnode = empty_fnode; + version_t projected_version = 0; - mempool::mds_co::list projected_fnode; + mempool::mds_co::list projected_fnode; std::unique_ptr scrub_infop; diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 72d72023811..74599fac9c1 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -1806,7 +1806,7 @@ void CInode::encode_lock_ifile(bufferlist& bl) frag_t fg = p.first; CDir *dir = p.second; if (is_auth() || dir->is_auth()) { - fnode_t *pf = dir->get_projected_fnode(); + const auto& pf = dir->get_projected_fnode(); dout(15) << fg << " " << *dir << dendl; dout(20) << fg << " fragstat " << pf->fragstat << dendl; dout(20) << fg << " accounted_fragstat " << pf->accounted_fragstat << dendl; @@ -1882,8 +1882,10 @@ void CInode::decode_lock_ifile(bufferlist::const_iterator& p) dout(10) << fg << " first " << dir->first << " -> " << fgfirst << " on " << *dir << dendl; dir->first = fgfirst; - dir->fnode.fragstat = fragstat; - dir->fnode.accounted_fragstat = accounted_fragstat; + auto _fnode = CDir::allocate_fnode(*dir->get_fnode()); + _fnode->fragstat = fragstat; + _fnode->accounted_fragstat = accounted_fragstat; + dir->reset_fnode(std::move(_fnode)); if (!(fragstat == accounted_fragstat)) { dout(10) << fg << " setting filelock updated flag" << dendl; filelock.mark_dirty(); // ok bc we're auth and caller will handle @@ -1893,7 +1895,7 @@ void CInode::decode_lock_ifile(bufferlist::const_iterator& p) dout(10) << fg << " first " << dir->first << " -> " << fgfirst << " on " << *dir << dendl; dir->first = fgfirst; - fnode_t *pf = dir->get_projected_fnode(); + const auto& pf = dir->get_projected_fnode(); finish_scatter_update(&filelock, dir, _inode->dirstat.version, pf->accounted_fragstat.version); } @@ -1923,7 +1925,7 @@ void CInode::encode_lock_inest(bufferlist& bl) frag_t fg = p.first; CDir *dir = p.second; if (is_auth() || dir->is_auth()) { - fnode_t *pf = dir->get_projected_fnode(); + const auto& pf = dir->get_projected_fnode(); dout(10) << __func__ << " " << fg << " dir " << *dir << dendl; dout(10) << __func__ << " " << fg << " rstat " << pf->rstat << dendl; dout(10) << __func__ << " " << fg << " accounted_rstat " << pf->rstat << dendl; @@ -1986,8 +1988,10 @@ void CInode::decode_lock_inest(bufferlist::const_iterator& p) dout(10) << fg << " first " << dir->first << " -> " << fgfirst << " on " << *dir << dendl; dir->first = fgfirst; - dir->fnode.rstat = rstat; - dir->fnode.accounted_rstat = accounted_rstat; + auto _fnode = CDir::allocate_fnode(*dir->get_fnode()); + _fnode->rstat = rstat; + _fnode->accounted_rstat = accounted_rstat; + dir->reset_fnode(std::move(_fnode)); dir->dirty_old_rstat.swap(dirty_old_rstat); if (!(rstat == accounted_rstat) || !dir->dirty_old_rstat.empty()) { dout(10) << fg << " setting nestlock updated flag" << dendl; @@ -1998,7 +2002,7 @@ void CInode::decode_lock_inest(bufferlist::const_iterator& p) dout(10) << fg << " first " << dir->first << " -> " << fgfirst << " on " << *dir << dendl; dir->first = fgfirst; - fnode_t *pf = dir->get_projected_fnode(); + const auto& pf = dir->get_projected_fnode(); finish_scatter_update(&nestlock, dir, _inode->rstat.version, pf->accounted_rstat.version); } @@ -2288,7 +2292,7 @@ void CInode::start_scatter(ScatterLock *lock) for (const auto &p : dirfrags) { frag_t fg = p.first; CDir *dir = p.second; - fnode_t *pf = dir->get_projected_fnode(); + const auto& pf = dir->get_projected_fnode(); dout(20) << fg << " " << *dir << dendl; if (!dir->is_auth()) @@ -2343,7 +2347,7 @@ void CInode::finish_scatter_update(ScatterLock *lock, CDir *dir, MutationRef mut(new MutationImpl()); mut->ls = mdlog->get_current_segment(); - fnode_t *pf = dir->project_fnode(); + auto pf = dir->project_fnode(); std::string_view ename; switch (lock->get_type()) { @@ -2368,9 +2372,6 @@ void CInode::finish_scatter_update(ScatterLock *lock, CDir *dir, ceph_abort(); } - pf->version = dir->pre_dirty(); - mut->add_projected_fnode(dir); - EUpdate *le = new EUpdate(mdlog, ename); mdlog->start_entry(le); le->metablob.add_dir_context(dir); @@ -2394,6 +2395,9 @@ void CInode::finish_scatter_update(ScatterLock *lock, CDir *dir, mut->ls->dirty_dirfrag_nest.push_back(&item_dirty_dirfrag_nest); } } + + pf->version = dir->pre_dirty(); + mut->add_projected_fnode(dir); mdlog->submit_entry(le, new C_Inode_FragUpdate(this, dir, mut)); } else { @@ -2464,9 +2468,11 @@ void CInode::finish_scatter_gather_update(int type) dirstat_valid = false; } - fnode_t *pf = dir->get_projected_fnode(); + CDir::fnode_const_ptr pf; if (update) pf = dir->project_fnode(); + else + pf = dir->get_projected_fnode(); if (pf->accounted_fragstat.version == pi->dirstat.version - 1) { dout(20) << fg << " fragstat " << pf->fragstat << dendl; @@ -2479,18 +2485,20 @@ void CInode::finish_scatter_gather_update(int type) if (pf->fragstat.nfiles < 0 || pf->fragstat.nsubdirs < 0) { clog->error() << "bad/negative dir size on " - << dir->dirfrag() << " " << pf->fragstat; + << dir->dirfrag() << " " << pf->fragstat; ceph_assert(!"bad/negative fragstat" == g_conf()->mds_verify_scatter); - + + auto _pf = const_cast(pf.get()); if (pf->fragstat.nfiles < 0) - pf->fragstat.nfiles = 0; + _pf->fragstat.nfiles = 0; if (pf->fragstat.nsubdirs < 0) - pf->fragstat.nsubdirs = 0; + _pf->fragstat.nsubdirs = 0; } if (update) { - pf->accounted_fragstat = pf->fragstat; - pf->fragstat.version = pf->accounted_fragstat.version = pi->dirstat.version; + auto _pf = const_cast(pf.get()); + _pf->accounted_fragstat = _pf->fragstat; + _pf->fragstat.version = _pf->accounted_fragstat.version = pi->dirstat.version; dout(10) << fg << " updated accounted_fragstat " << pf->fragstat << " on " << *dir << dendl; } @@ -2531,8 +2539,7 @@ void CInode::finish_scatter_gather_update(int type) } } - if (pi->dirstat.nfiles < 0 || pi->dirstat.nsubdirs < 0) - { + if (pi->dirstat.nfiles < 0 || pi->dirstat.nsubdirs < 0) { std::string path; make_path_string(path); clog->error() << "Inconsistent statistics detected: fragstat on inode " @@ -2576,9 +2583,11 @@ void CInode::finish_scatter_gather_update(int type) rstat_valid = false; } - fnode_t *pf = dir->get_projected_fnode(); + CDir::fnode_const_ptr pf; if (update) pf = dir->project_fnode(); + else + pf = dir->get_projected_fnode(); if (pf->accounted_rstat.version == pi->rstat.version-1) { // only pull this frag's dirty rstat inodes into the frag if @@ -2602,9 +2611,10 @@ void CInode::finish_scatter_gather_update(int type) dout(20) << fg << " skipping STALE accounted_rstat " << pf->accounted_rstat << dendl; } if (update) { - pf->accounted_rstat = pf->rstat; + auto _pf = const_cast(pf.get()); + _pf->accounted_rstat = pf->rstat; dir->dirty_old_rstat.clear(); - pf->rstat.version = pf->accounted_rstat.version = pi->rstat.version; + _pf->rstat.version = _pf->accounted_rstat.version = pi->rstat.version; dir->check_rstats(); dout(10) << fg << " updated accounted_rstat " << pf->rstat << " on " << *dir << dendl; } @@ -2666,16 +2676,16 @@ void CInode::finish_scatter_gather_update_accounted(int type, MutationRef& mut, if (type == CEPH_LOCK_IDFT) continue; // nothing to do. + if (type == CEPH_LOCK_INEST) + dir->assimilate_dirty_rstat_inodes_finish(mut, metablob); + dout(10) << " journaling updated frag accounted_ on " << *dir << dendl; ceph_assert(dir->is_projected()); - fnode_t *pf = dir->get_projected_fnode(); + auto pf = dir->_get_projected_fnode(); pf->version = dir->pre_dirty(); mut->add_projected_fnode(dir); metablob->add_dir(dir, true); mut->auth_pin(dir); - - if (type == CEPH_LOCK_INEST) - dir->assimilate_dirty_rstat_inodes_finish(mut, metablob); } } @@ -4331,10 +4341,10 @@ void CInode::encode_export(bufferlist& bl) CDir *dir = p.second; if (dir->state_test(CDir::STATE_EXPORTBOUND)) { encode(p.first, bounding); - encode(dir->fnode.fragstat, bounding); - encode(dir->fnode.accounted_fragstat, bounding); - encode(dir->fnode.rstat, bounding); - encode(dir->fnode.accounted_rstat, bounding); + encode(dir->get_fnode()->fragstat, bounding); + encode(dir->get_fnode()->accounted_fragstat, bounding); + encode(dir->get_fnode()->rstat, bounding); + encode(dir->get_fnode()->accounted_rstat, bounding); dout(10) << " encoded fragstat/rstat info for " << *dir << dendl; } } @@ -4421,6 +4431,7 @@ void CInode::decode_import(bufferlist::const_iterator& p, // check because the dirfrag is under migration? That implies // it is frozen (and in a SYNC or LOCK state). FIXME. + auto _fnode = CDir::allocate_fnode(*dir->get_fnode()); if (dir->is_auth() || filelock.get_state() == LOCK_MIX) { dout(10) << " skipped fragstat info for " << *dir << dendl; @@ -4428,8 +4439,8 @@ void CInode::decode_import(bufferlist::const_iterator& p, decode(f, q); decode(f, q); } else { - decode(dir->fnode.fragstat, q); - decode(dir->fnode.accounted_fragstat, q); + decode(_fnode->fragstat, q); + decode(_fnode->accounted_fragstat, q); dout(10) << " took fragstat info for " << *dir << dendl; } if (dir->is_auth() || @@ -4439,10 +4450,11 @@ void CInode::decode_import(bufferlist::const_iterator& p, decode(n, q); decode(n, q); } else { - decode(dir->fnode.rstat, q); - decode(dir->fnode.accounted_rstat, q); + decode(_fnode->rstat, q); + decode(_fnode->accounted_rstat, q); dout(10) << " took rstat info for " << *dir << dendl; } + dir->reset_fnode(std::move(_fnode)); } _decode_locks_full(p); @@ -4855,8 +4867,8 @@ next: for (const auto &p : in->dirfrags) { CDir *dir = p.second; ceph_assert(dir->get_version() > 0); - nest_info.add(dir->fnode.accounted_rstat); - dir_info.add(dir->fnode.accounted_fragstat); + nest_info.add(dir->get_fnode()->accounted_rstat); + dir_info.add(dir->get_fnode()->accounted_fragstat); if (dir->scrub_infop->pending_scrub_error) { dir->scrub_infop->pending_scrub_error = false; if (dir->scrub_infop->header->get_repair()) { diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 9bd7df5eb74..4bf10e968af 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -2830,15 +2830,13 @@ bool Locker::check_inode_max_size(CInode *in, bool force_wrlock, le = eu; } mds->mdlog->start_entry(le); - if (update_size) { // FIXME if/when we do max_size nested accounting - mdcache->predirty_journal_parents(mut, metablob, in, 0, PREDIRTY_PRIMARY); - // no cow, here! - CDentry *parent = in->get_projected_parent_dn(); - metablob->add_primary_dentry(parent, in, true); - } else { - metablob->add_dir_context(in->get_projected_parent_dn()->get_dir()); - mdcache->journal_dirty_inode(mut.get(), metablob, in); - } + + mdcache->predirty_journal_parents(mut, metablob, in, 0, PREDIRTY_PRIMARY); + // no cow, here! + CDentry *parent = in->get_projected_parent_dn(); + metablob->add_primary_dentry(parent, in, true); + mdcache->journal_dirty_inode(mut.get(), metablob, in); + mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, mut, UPDATE_SHAREMAX, ref_t())); wrlock_force(&in->filelock, mut); // wrlock for duration of journal diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index f06d585266d..ede52ede742 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -480,15 +480,16 @@ void MDCache::create_empty_hierarchy(MDSGather *gather) adjust_subtree_auth(rootdir, mds->get_nodeid()); rootdir->dir_rep = CDir::REP_ALL; //NONE; - ceph_assert(rootdir->fnode.accounted_fragstat == rootdir->fnode.fragstat); - ceph_assert(rootdir->fnode.fragstat == root->get_inode()->dirstat); - ceph_assert(rootdir->fnode.accounted_rstat == rootdir->fnode.rstat); + ceph_assert(rootdir->get_fnode()->accounted_fragstat == rootdir->get_fnode()->fragstat); + ceph_assert(rootdir->get_fnode()->fragstat == root->get_inode()->dirstat); + ceph_assert(rootdir->get_fnode()->accounted_rstat == rootdir->get_fnode()->rstat); /* Do no update rootdir rstat information of the fragment, rstat upkeep magic * assume version 0 is stale/invalid. */ rootdir->mark_complete(); - rootdir->mark_dirty(rootdir->pre_dirty(), mds->mdlog->get_current_segment()); + rootdir->_get_fnode()->version = rootdir->pre_dirty(); + rootdir->mark_dirty(mds->mdlog->get_current_segment()); rootdir->commit(0, gather->new_sub()); root->store(gather->new_sub()); @@ -500,6 +501,8 @@ void MDCache::create_mydir_hierarchy(MDSGather *gather) CInode *my = create_system_inode(MDS_INO_MDSDIR(mds->get_nodeid()), S_IFDIR); CDir *mydir = my->get_or_open_dirfrag(this, frag_t()); + auto mydir_fnode = mydir->_get_fnode(); + adjust_subtree_auth(mydir, mds->get_nodeid()); LogSegment *ls = mds->mdlog->get_current_segment(); @@ -513,29 +516,31 @@ void MDCache::create_mydir_hierarchy(MDSGather *gather) CDentry *sdn = mydir->add_primary_dentry(name.str(), stray); sdn->_mark_dirty(mds->mdlog->get_current_segment()); - stray->_get_inode()->dirstat = straydir->fnode.fragstat; + stray->_get_inode()->dirstat = straydir->get_fnode()->fragstat; - mydir->fnode.rstat.add(stray->get_inode()->rstat); - mydir->fnode.fragstat.nsubdirs++; + mydir_fnode->rstat.add(stray->get_inode()->rstat); + mydir_fnode->fragstat.nsubdirs++; // save them straydir->mark_complete(); - straydir->mark_dirty(straydir->pre_dirty(), ls); + straydir->_get_fnode()->version = straydir->pre_dirty(); + straydir->mark_dirty(ls); straydir->commit(0, gather->new_sub()); stray->mark_dirty_parent(ls, true); stray->store_backtrace(gather->new_sub()); } - mydir->fnode.accounted_fragstat = mydir->fnode.fragstat; - mydir->fnode.accounted_rstat = mydir->fnode.rstat; + mydir_fnode->accounted_fragstat = mydir->get_fnode()->fragstat; + mydir_fnode->accounted_rstat = mydir->get_fnode()->rstat; auto inode = myin->_get_inode(); - inode->dirstat = mydir->fnode.fragstat; - inode->rstat = mydir->fnode.rstat; + inode->dirstat = mydir->get_fnode()->fragstat; + inode->rstat = mydir->get_fnode()->rstat; ++inode->rstat.rsubdirs; inode->accounted_rstat = inode->rstat; mydir->mark_complete(); - mydir->mark_dirty(mydir->pre_dirty(), ls); + mydir_fnode->version = mydir->pre_dirty(); + mydir->mark_dirty(ls); mydir->commit(0, gather->new_sub()); myin->store(gather->new_sub()); @@ -568,9 +573,11 @@ void MDCache::_create_system_file(CDir *dir, std::string_view name, CInode *in, mdir = in->get_or_open_dirfrag(this, frag_t()); mdir->mark_complete(); - mdir->pre_dirty(); - } else + mdir->_get_fnode()->version = mdir->pre_dirty(); + } else { inode->rstat.rfiles = 1; + } + inode->version = dn->pre_dirty(); SnapRealm *realm = dir->get_inode()->find_snaprealm(); @@ -616,7 +623,7 @@ void MDCache::_create_system_file_finish(MutationRef& mut, CDentry *dn, version_ if (in->is_dir()) { CDir *dir = in->get_dirfrag(frag_t()); ceph_assert(dir); - dir->mark_dirty(1, mut->ls); + dir->mark_dirty(mut->ls); dir->mark_new(mut->ls); } @@ -789,7 +796,8 @@ void MDCache::populate_mydir() LogSegment *ls = mds->mdlog->get_current_segment(); mydir->state_clear(CDir::STATE_BADFRAG); mydir->mark_complete(); - mydir->mark_dirty(mydir->pre_dirty(), ls); + mydir->_get_fnode()->version = mydir->pre_dirty(); + mydir->mark_dirty(ls); } // open or create stray @@ -1687,10 +1695,12 @@ void MDCache::journal_cow_dentry(MutationImpl *mut, EMetaBlob *metablob, snapid_t oldfirst = dn->first; dn->first = dir_follows+1; if (realm->has_snaps_in_range(oldfirst, dir_follows)) { - CDentry *olddn = dn->dir->add_remote_dentry(dn->get_name(), in->ino(), in->d_type(), - oldfirst, dir_follows); - olddn->pre_dirty(); + CDir *dir = dn->dir; + CDentry *olddn = dir->add_remote_dentry(dn->get_name(), in->ino(), in->d_type(), + oldfirst, dir_follows); dout(10) << " olddn " << *olddn << dendl; + ceph_assert(dir->is_projected()); + olddn->set_projected_version(dir->get_projected_version()); metablob->add_remote_dentry(olddn, true); mut->add_cow_dentry(olddn); // FIXME: adjust link count here? hmm. @@ -1752,43 +1762,38 @@ void MDCache::journal_cow_dentry(MutationImpl *mut, EMetaBlob *metablob, } dout(10) << " dn " << *dn << dendl; + CDir *dir = dn->get_dir(); + ceph_assert(dir->is_projected()); + if (in) { CInode *oldin = cow_inode(in, follows); + ceph_assert(in->is_projected()); mut->add_cow_inode(oldin); if (pcow_inode) *pcow_inode = oldin; - CDentry *olddn = dn->dir->add_primary_dentry(dn->get_name(), oldin, oldfirst, follows); - oldin->_get_inode()->version = olddn->pre_dirty(); + CDentry *olddn = dir->add_primary_dentry(dn->get_name(), oldin, oldfirst, follows); dout(10) << " olddn " << *olddn << dendl; bool need_snapflush = !oldin->client_snap_caps.empty(); if (need_snapflush) { mut->ls->open_files.push_back(&oldin->item_open_file); mds->locker->mark_need_snapflush_inode(oldin); } + olddn->set_projected_version(dir->get_projected_version()); metablob->add_primary_dentry(olddn, 0, true, false, false, need_snapflush); mut->add_cow_dentry(olddn); } else { ceph_assert(dnl->is_remote()); - CDentry *olddn = dn->dir->add_remote_dentry(dn->get_name(), dnl->get_remote_ino(), dnl->get_remote_d_type(), + CDentry *olddn = dir->add_remote_dentry(dn->get_name(), dnl->get_remote_ino(), dnl->get_remote_d_type(), oldfirst, follows); - olddn->pre_dirty(); dout(10) << " olddn " << *olddn << dendl; + + olddn->set_projected_version(dir->get_projected_version()); metablob->add_remote_dentry(olddn, true); mut->add_cow_dentry(olddn); } } } - -void MDCache::journal_cow_inode(MutationRef& mut, EMetaBlob *metablob, - CInode *in, snapid_t follows, - CInode **pcow_inode) -{ - dout(10) << "journal_cow_inode follows " << follows << " on " << *in << dendl; - CDentry *dn = in->get_projected_parent_dn(); - journal_cow_dentry(mut.get(), metablob, dn, follows, pcow_inode); -} - void MDCache::journal_dirty_inode(MutationImpl *mut, EMetaBlob *metablob, CInode *in, snapid_t follows) { if (in->is_base()) { @@ -1913,7 +1918,7 @@ void MDCache::_project_rstat_inode_to_frag(const CInode::mempool_inode* inode, s */ nest_info_t *prstat; snapid_t first; - fnode_t *pf = parent->get_projected_fnode(); + auto pf = parent->_get_projected_fnode(); if (last == CEPH_NOSNAP) { if (g_conf()->mds_snap_rstat) first = std::max(ofirst, parent->first); @@ -2005,7 +2010,8 @@ void MDCache::_project_rstat_inode_to_frag(const CInode::mempool_inode* inode, s } } -void MDCache::project_rstat_frag_to_inode(nest_info_t& rstat, nest_info_t& accounted_rstat, +void MDCache::project_rstat_frag_to_inode(const nest_info_t& rstat, + const nest_info_t& accounted_rstat, snapid_t ofirst, snapid_t last, CInode *pin, bool cow_head) { @@ -2228,7 +2234,7 @@ void MDCache::predirty_journal_parents(MutationRef mut, EMetaBlob *blob, mut->auth_pin(parent); mut->add_projected_fnode(parent); - fnode_t *pf = parent->project_fnode(); + auto pf = parent->project_fnode(); pf->version = parent->pre_dirty(); if (do_parent_mtime || linkunlink) { @@ -6585,12 +6591,10 @@ void MDCache::truncate_inode_finish(CInode *in, LogSegment *ls) EUpdate *le = new EUpdate(mds->mdlog, "truncate finish"); mds->mdlog->start_entry(le); - CDentry *dn = in->get_projected_parent_dn(); - le->metablob.add_dir_context(dn->get_dir()); - le->metablob.add_primary_dentry(dn, in, true); - le->metablob.add_truncate_finish(in->ino(), ls->seq); + predirty_journal_parents(mut, &le->metablob, in, 0, PREDIRTY_PRIMARY); journal_dirty_inode(mut.get(), &le->metablob, in); + le->metablob.add_truncate_finish(in->ino(), ls->seq); mds->mdlog->submit_entry(le, new C_MDC_TruncateLogged(this, in, mut)); // flush immediately if there are readers/writers waiting @@ -11980,6 +11984,7 @@ void MDCache::dispatch_fragment_dir(MDRequestRef& mdr) // journal dirfragtree auto pi = diri->project_inode(); pi.inode->version = diri->pre_dirty(); + predirty_journal_parents(mdr, &le->metablob, diri, 0, PREDIRTY_PRIMARY); journal_dirty_inode(mdr.get(), &le->metablob, diri); } else { mds->locker->mark_updated_scatterlock(&diri->dirfragtreelock); @@ -12340,6 +12345,17 @@ void MDCache::wait_for_uncommitted_fragments(MDSGather *gather) p.second.waiters.push_back(gather->new_sub()); } +struct C_MDC_FragmentRollback : public MDCacheLogContext { + MutationRef mut; + C_MDC_FragmentRollback(MDCache *c, MutationRef& m) : + MDCacheLogContext(c), mut(m) {} + void finish(int r) override { + mut->apply(); + get_mds()->locker->drop_locks(mut.get()); + mut->cleanup(); + } +}; + void MDCache::rollback_uncommitted_fragments() { dout(10) << "rollback_uncommitted_fragments: " << uncommitted_fragments.size() << " pending" << dendl; @@ -12357,7 +12373,8 @@ void MDCache::rollback_uncommitted_fragments() dout(10) << " rolling back " << p->first << " refragment by " << uf.bits << " bits" << dendl; - LogSegment *ls = mds->mdlog->get_current_segment(); + MutationRef mut(new MutationImpl()); + mut->ls = mds->mdlog->get_current_segment(); EFragment *le = new EFragment(mds->mdlog, EFragment::OP_ROLLBACK, p->first, uf.bits); mds->mdlog->start_entry(le); bool diri_auth = (diri->authority() != CDIR_AUTH_UNDEF); @@ -12379,20 +12396,21 @@ void MDCache::rollback_uncommitted_fragments() dirfrag_rollback rollback; decode(rollback, bp); - dir->set_version(rollback.fnode.version); dir->fnode = rollback.fnode; - dir->_mark_dirty(ls); + dir->mark_dirty(mut->ls); - if (!(dir->fnode.rstat == dir->fnode.accounted_rstat)) { + if (!(dir->get_fnode()->rstat == dir->get_fnode()->accounted_rstat)) { dout(10) << " dirty nestinfo on " << *dir << dendl; - mds->locker->mark_updated_scatterlock(&dir->inode->nestlock); - ls->dirty_dirfrag_nest.push_back(&dir->inode->item_dirty_dirfrag_nest); + mds->locker->mark_updated_scatterlock(&diri->nestlock); + mut->ls->dirty_dirfrag_nest.push_back(&diri->item_dirty_dirfrag_nest); + mut->add_updated_lock(&diri->nestlock); } - if (!(dir->fnode.fragstat == dir->fnode.accounted_fragstat)) { + if (!(dir->get_fnode()->fragstat == dir->get_fnode()->accounted_fragstat)) { dout(10) << " dirty fragstat on " << *dir << dendl; - mds->locker->mark_updated_scatterlock(&dir->inode->filelock); - ls->dirty_dirfrag_dir.push_back(&dir->inode->item_dirty_dirfrag_dir); + mds->locker->mark_updated_scatterlock(&diri->filelock); + mut->ls->dirty_dirfrag_dir.push_back(&diri->item_dirty_dirfrag_dir); + mut->add_updated_lock(&diri->filelock); } le->add_orig_frag(dir->get_frag()); @@ -12409,12 +12427,14 @@ void MDCache::rollback_uncommitted_fragments() if (diri_auth) { auto pi = diri->project_inode(); + mut->add_projected_inode(diri); pi.inode->version = diri->pre_dirty(); - diri->pop_and_dirty_projected_inode(ls); // hacky + predirty_journal_parents(mut, &le->metablob, diri, 0, PREDIRTY_PRIMARY); le->metablob.add_primary_dentry(diri->get_projected_parent_dn(), diri, true); } else { mds->locker->mark_updated_scatterlock(&diri->dirfragtreelock); - ls->dirty_dirfrag_dirfragtree.push_back(&diri->item_dirty_dirfrag_dirfragtree); + mut->ls->dirty_dirfrag_dirfragtree.push_back(&diri->item_dirty_dirfrag_dirfragtree); + mut->add_updated_lock(&diri->dirfragtreelock); } if (g_conf()->mds_debug_frag) @@ -12424,7 +12444,7 @@ void MDCache::rollback_uncommitted_fragments() ceph_assert(!diri->dirfragtree.is_leaf(leaf)); } - mds->mdlog->submit_entry(le); + mds->mdlog->submit_entry(le, new C_MDC_FragmentRollback(this, mut)); uf.old_frags.swap(old_frags); _fragment_committed(p->first, MDRequestRef()); @@ -13046,7 +13066,7 @@ void MDCache::repair_dirfrag_stats_work(MDRequestRef& mdr) frag_info.nfiles++; } - fnode_t *pf = dir->get_projected_fnode(); + auto pf = dir->get_projected_fnode(); bool good_fragstat = frag_info.same_sums(pf->fragstat); bool good_rstat = nest_info.same_sums(pf->rstat); if (good_fragstat && good_rstat) { @@ -13055,8 +13075,9 @@ void MDCache::repair_dirfrag_stats_work(MDRequestRef& mdr) return; } - pf = dir->project_fnode(); - pf->version = dir->pre_dirty(); + auto _pf = dir->project_fnode(); + _pf->version = dir->pre_dirty(); + pf = _pf; mdr->add_projected_fnode(dir); mdr->ls = mds->mdlog->get_current_segment(); @@ -13068,7 +13089,7 @@ void MDCache::repair_dirfrag_stats_work(MDRequestRef& mdr) frag_info.mtime = pf->fragstat.mtime; if (pf->fragstat.change_attr > frag_info.change_attr) frag_info.change_attr = pf->fragstat.change_attr; - pf->fragstat = frag_info; + _pf->fragstat = frag_info; mds->locker->mark_updated_scatterlock(&diri->filelock); mdr->ls->dirty_dirfrag_dir.push_back(&diri->item_dirty_dirfrag_dir); mdr->add_updated_lock(&diri->filelock); @@ -13077,7 +13098,7 @@ void MDCache::repair_dirfrag_stats_work(MDRequestRef& mdr) if (!good_rstat) { if (pf->rstat.rctime > nest_info.rctime) nest_info.rctime = pf->rstat.rctime; - pf->rstat = nest_info; + _pf->rstat = nest_info; mds->locker->mark_updated_scatterlock(&diri->nestlock); mdr->ls->dirty_dirfrag_nest.push_back(&diri->item_dirty_dirfrag_nest); mdr->add_updated_lock(&diri->nestlock); @@ -13175,8 +13196,8 @@ do_rdlocks: CDir *dir = diri->get_dirfrag(leaf); ceph_assert(dir); ceph_assert(dir->get_version() > 0); - dir_info.add(dir->fnode.accounted_fragstat); - nest_info.add(dir->fnode.accounted_rstat); + dir_info.add(dir->get_fnode()->accounted_fragstat); + nest_info.add(dir->get_fnode()->accounted_rstat); } } diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index e6debb30e01..1f1c7461383 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -407,17 +407,14 @@ class MDCache { void journal_cow_dentry(MutationImpl *mut, EMetaBlob *metablob, CDentry *dn, snapid_t follows=CEPH_NOSNAP, CInode **pcow_inode=0, CDentry::linkage_t *dnl=0); - void journal_cow_inode(MutationRef& mut, EMetaBlob *metablob, CInode *in, snapid_t follows=CEPH_NOSNAP, - CInode **pcow_inode=0); void journal_dirty_inode(MutationImpl *mut, EMetaBlob *metablob, CInode *in, snapid_t follows=CEPH_NOSNAP); void project_rstat_inode_to_frag(CInode *cur, CDir *parent, snapid_t first, int linkunlink, SnapRealm *prealm); void _project_rstat_inode_to_frag(const CInode::mempool_inode* inode, snapid_t ofirst, snapid_t last, CDir *parent, int linkunlink, bool update_inode); - void project_rstat_frag_to_inode(nest_info_t& rstat, nest_info_t& accounted_rstat, - snapid_t ofirst, snapid_t last, - CInode *pin, bool cow_head); + void project_rstat_frag_to_inode(const nest_info_t& rstat, const nest_info_t& accounted_rstat, + snapid_t ofirst, snapid_t last, CInode *pin, bool cow_head); void broadcast_quota_to_client(CInode *in, client_t exclude_ct = -1, bool quota_change = false); void predirty_journal_parents(MutationRef mut, EMetaBlob *blob, CInode *in, CDir *parent, @@ -1256,6 +1253,7 @@ class MDCache { friend class C_MDC_FragmentPrep; friend class C_MDC_FragmentStore; friend class C_MDC_FragmentCommit; + friend class C_MDC_FragmentRollback; friend class C_IO_MDC_FragmentPurgeOld; // -- subtrees -- diff --git a/src/mds/Mutation.cc b/src/mds/Mutation.cc index 5359e0a91ce..a5d7b0be057 100644 --- a/src/mds/Mutation.cc +++ b/src/mds/Mutation.cc @@ -262,15 +262,15 @@ void MutationImpl::add_cow_dentry(CDentry *dn) void MutationImpl::apply() { pop_and_dirty_projected_inodes(); - pop_and_dirty_projected_fnodes(); - for (const auto& in : dirty_cow_inodes) { + for (const auto& in : dirty_cow_inodes) in->_mark_dirty(ls); - } - for (const auto& [dentry, v] : dirty_cow_dentries) { - dentry->mark_dirty(v, ls); - } - + + for (const auto& [dn, v] : dirty_cow_dentries) + dn->mark_dirty(v, ls); + + pop_and_dirty_projected_fnodes(); + for (const auto& lock : updated_locks) { lock->mark_dirty(); } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 29cd9f36782..2413b4cd786 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -5964,8 +5964,7 @@ public: if (newi->is_dir()) { CDir *dir = newi->get_dirfrag(frag_t()); ceph_assert(dir); - dir->fnode.version--; - dir->mark_dirty(dir->fnode.version + 1, mdr->ls); + dir->mark_dirty(mdr->ls); dir->mark_new(mdr->ls); } @@ -6130,7 +6129,7 @@ void Server::handle_client_mkdir(MDRequestRef& mdr) CDir *newdir = newi->get_or_open_dirfrag(mdcache, frag_t()); newdir->state_set(CDir::STATE_CREATING); newdir->mark_complete(); - newdir->fnode.version = newdir->pre_dirty(); + newdir->_get_fnode()->version = newdir->pre_dirty(); // prepare finisher mdr->ls = mdlog->get_current_segment(); @@ -6648,7 +6647,7 @@ void Server::handle_peer_link_prep(MDRequestRef& mdr) rollback.reqid = mdr->reqid; rollback.ino = targeti->ino(); rollback.old_ctime = targeti->get_inode()->ctime; // we hold versionlock xlock; no concorrent projections - const fnode_t *pf = targeti->get_parent_dn()->get_dir()->get_projected_fnode(); + const auto& pf = targeti->get_parent_dn()->get_dir()->get_projected_fnode(); rollback.old_dir_mtime = pf->fragstat.mtime; rollback.old_dir_rctime = pf->rstat.rctime; rollback.was_inc = inc; @@ -6801,7 +6800,7 @@ void Server::do_link_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef& // parent dir rctime CDir *parent = in->get_projected_parent_dn()->get_dir(); - fnode_t *pf = parent->project_fnode(); + auto pf = parent->project_fnode(); mut->add_projected_fnode(parent); pf->version = parent->pre_dirty(); if (pf->fragstat.mtime == pi.inode->ctime) { @@ -7603,7 +7602,7 @@ bool Server::_dir_is_nonempty(MDRequestRef& mdr, CInode *in) auto&& ls = in->get_dirfrags(); for (const auto& dir : ls) { - const fnode_t *pf = dir->get_projected_fnode(); + const auto& pf = dir->get_projected_fnode(); if (pf->fragstat.size()) { dout(10) << "dir_is_nonempty dirstat has " << pf->fragstat.size() << " items " << *dir << dendl; @@ -8406,6 +8405,12 @@ void Server::_rename_prepare(MDRequestRef& mdr, PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); } } + + if (srcdnl->is_remote() && srci->is_auth()) { + CDir *srci_dir = srci->get_projected_parent_dir(); + if (srci_dir != srcdn->get_dir() && srci_dir != destdn->get_dir()) + mdcache->predirty_journal_parents(mdr, metablob, srci, srci_dir, PREDIRTY_PRIMARY); + } // move srcdn int predirty_primary = (srcdnl->is_primary() && srcdn->get_dir() != destdn->get_dir()) ? PREDIRTY_PRIMARY:0; @@ -9213,24 +9218,24 @@ void Server::_commit_peer_rename(MDRequestRef& mdr, int r, mdcache->shutdown_export_stray_finish(migrated_stray); } -void _rollback_repair_dir(MutationRef& mut, CDir *dir, rename_rollback::drec &r, utime_t ctime, - bool isdir, int linkunlink, nest_info_t &rstat) +static void _rollback_repair_dir(MutationRef& mut, CDir *dir, + rename_rollback::drec &r, utime_t ctime, + bool isdir, const nest_info_t &rstat) { - fnode_t *pf; - pf = dir->project_fnode(); + auto pf = dir->project_fnode(); mut->add_projected_fnode(dir); pf->version = dir->pre_dirty(); if (isdir) { - pf->fragstat.nsubdirs += linkunlink; + pf->fragstat.nsubdirs += 1; } else { - pf->fragstat.nfiles += linkunlink; + pf->fragstat.nfiles += 1; } if (r.ino) { - pf->rstat.rbytes += linkunlink * rstat.rbytes; - pf->rstat.rfiles += linkunlink * rstat.rfiles; - pf->rstat.rsubdirs += linkunlink * rstat.rsubdirs; - pf->rstat.rsnaps += linkunlink * rstat.rsnaps; + pf->rstat.rbytes += rstat.rbytes; + pf->rstat.rfiles += rstat.rfiles; + pf->rstat.rsubdirs += rstat.rsubdirs; + pf->rstat.rsnaps += rstat.rsnaps; } if (pf->fragstat.mtime == ctime) { pf->fragstat.mtime = r.dirfrag_old_mtime; @@ -9366,21 +9371,31 @@ void Server::do_rename_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef map> splits[2]; - CInode::mempool_inode *pip = nullptr; + const CInode::mempool_inode *pip = nullptr; if (in) { bool projected; - if (in->get_projected_parent_dn()->authority().first == whoami) { + CDir *pdir = in->get_projected_parent_dir(); + if (pdir->authority().first == whoami) { auto pi = in->project_inode(); - pip = pi.inode.get(); mut->add_projected_inode(in); - pip->version = in->pre_dirty(); + pi.inode->version = in->pre_dirty(); + if (pdir != srcdir) { + auto pf = pdir->project_fnode(); + mut->add_projected_fnode(pdir); + pf->version = pdir->pre_dirty(); + } + if (pi.inode->ctime == rollback.ctime) + pi.inode->ctime = rollback.orig_src.old_ctime; projected = true; } else { - // FIXME: pip = in->get_projected_inode(); + if (in->get_inode()->ctime == rollback.ctime) { + auto _inode = CInode::allocate_inode(*in->get_inode()); + _inode->ctime = rollback.orig_src.old_ctime; + in->reset_inode(_inode); + } projected = false; } - if (pip->ctime == rollback.ctime) - pip->ctime = rollback.orig_src.old_ctime; + pip = in->get_projected_inode().get(); if (rollback.srci_snapbl.length() && in->snaprealm) { bool hadrealm; @@ -9411,12 +9426,6 @@ void Server::do_rename_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef } } - if (srcdn && srcdn->authority().first == whoami) { - nest_info_t blah; - _rollback_repair_dir(mut, srcdir, rollback.orig_src, rollback.ctime, - in ? in->is_dir() : false, 1, pip ? pip->accounted_rstat : blah); - } - // repair dest if (destdn) { if (rollback.orig_dest.ino && target) { @@ -9437,17 +9446,24 @@ void Server::do_rename_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef if (target) { bool projected; - CInode::mempool_inode *ti = nullptr; - if (target->get_projected_parent_dn()->authority().first == whoami) { + CInode::inode_ptr ti; + CDir *pdir = target->get_projected_parent_dir(); + if (pdir->authority().first == whoami) { auto pi = target->project_inode(); - ti = pi.inode.get(); mut->add_projected_inode(target); - ti->version = target->pre_dirty(); + pi.inode->version = target->pre_dirty(); + if (pdir != srcdir) { + auto pf = pdir->project_fnode(); + mut->add_projected_fnode(pdir); + pf->version = pdir->pre_dirty(); + } + ti = pi.inode; projected = true; } else { - //FIXME: ti = target->get_projected_inode(); + ti = CInode::allocate_inode(*target->get_inode()); projected = false; } + if (ti->ctime == rollback.ctime) ti->ctime = rollback.orig_dest.old_ctime; if (MDS_INO_IS_STRAY(rollback.orig_src.dirfrag.ino)) { @@ -9459,6 +9475,9 @@ void Server::do_rename_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef } else ti->nlink++; + if (!projected) + target->reset_inode(ti); + if (rollback.desti_snapbl.length() && target->snaprealm) { bool hadrealm; auto p = rollback.desti_snapbl.cbegin(); @@ -9488,6 +9507,12 @@ void Server::do_rename_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef } } + if (srcdn && srcdn->authority().first == whoami) { + nest_info_t blah; + _rollback_repair_dir(mut, srcdir, rollback.orig_src, rollback.ctime, + in && in->is_dir(), pip ? pip->accounted_rstat : blah); + } + if (srcdn) dout(0) << " srcdn back to " << *srcdn << dendl; if (in) diff --git a/src/mds/StrayManager.cc b/src/mds/StrayManager.cc index 7aace501831..7e4e2e13e8c 100644 --- a/src/mds/StrayManager.cc +++ b/src/mds/StrayManager.cc @@ -146,23 +146,23 @@ void StrayManager::purge(CDentry *dn) class C_PurgeStrayLogged : public StrayManagerLogContext { CDentry *dn; version_t pdv; - LogSegment *ls; + MutationRef mut; public: - C_PurgeStrayLogged(StrayManager *sm_, CDentry *d, version_t v, LogSegment *s) : - StrayManagerLogContext(sm_), dn(d), pdv(v), ls(s) { } + C_PurgeStrayLogged(StrayManager *sm_, CDentry *d, version_t v, MutationRef& m) : + StrayManagerLogContext(sm_), dn(d), pdv(v), mut(m) { } void finish(int r) override { - sm->_purge_stray_logged(dn, pdv, ls); + sm->_purge_stray_logged(dn, pdv, mut); } }; class C_TruncateStrayLogged : public StrayManagerLogContext { CDentry *dn; - LogSegment *ls; + MutationRef mut; public: - C_TruncateStrayLogged(StrayManager *sm, CDentry *d, LogSegment *s) : - StrayManagerLogContext(sm), dn(d), ls(s) { } + C_TruncateStrayLogged(StrayManager *sm, CDentry *d, MutationRef& m) : + StrayManagerLogContext(sm), dn(d), mut(m) {} void finish(int r) override { - sm->_truncate_stray_logged(dn, ls); + sm->_truncate_stray_logged(dn, mut); } }; @@ -178,10 +178,11 @@ void StrayManager::_purge_stray_purged( if (only_head) { /* This was a ::truncate */ - EUpdate *le = new EUpdate(mds->mdlog, "purge_stray truncate"); - mds->mdlog->start_entry(le); + MutationRef mut(new MutationImpl()); + mut->ls = mds->mdlog->get_current_segment(); auto pi = in->project_inode(); + mut->add_projected_inode(in); pi.inode->size = 0; pi.inode->max_size_ever = 0; pi.inode->client_ranges.clear(); @@ -189,12 +190,19 @@ void StrayManager::_purge_stray_purged( pi.inode->truncate_from = 0; pi.inode->version = in->pre_dirty(); - le->metablob.add_dir_context(dn->dir); - le->metablob.add_primary_dentry(dn, in, true); + CDir *dir = dn->get_dir(); + auto pf = dir->project_fnode(); + mut->add_projected_fnode(dir); + pf->version = dir->pre_dirty(); - mds->mdlog->submit_entry(le, - new C_TruncateStrayLogged( - this, dn, mds->mdlog->get_current_segment())); + EUpdate *le = new EUpdate(mds->mdlog, "purge_stray truncate"); + mds->mdlog->start_entry(le); + + le->metablob.add_dir_context(dir); + auto& dl = le->metablob.add_dir(dn->dir, true); + le->metablob.add_primary_dentry(dl, dn, in, EMetaBlob::fullbit::STATE_DIRTY); + + mds->mdlog->submit_entry(le, new C_TruncateStrayLogged(this, dn, mut)); } else { if (in->get_num_ref() != (int)in->is_dirty() || dn->get_num_ref() != @@ -208,6 +216,9 @@ void StrayManager::_purge_stray_purged( ceph_abort_msg("rogue reference to purging inode"); } + MutationRef mut(new MutationImpl()); + mut->ls = mds->mdlog->get_current_segment(); + // kill dentry. version_t pdv = dn->pre_dirty(); dn->push_projected_linkage(); // NULL @@ -217,7 +228,8 @@ void StrayManager::_purge_stray_purged( // update dirfrag fragstat, rstat CDir *dir = dn->get_dir(); - fnode_t *pf = dir->project_fnode(); + auto pf = dir->project_fnode(); + mut->add_projected_fnode(dir); pf->version = dir->pre_dirty(); if (in->is_dir()) pf->fragstat.nsubdirs--; @@ -226,16 +238,15 @@ void StrayManager::_purge_stray_purged( pf->rstat.sub(in->get_inode()->accounted_rstat); le->metablob.add_dir_context(dn->dir); - EMetaBlob::dirlump& dl = le->metablob.add_dir(dn->dir, true); + auto& dl = le->metablob.add_dir(dn->dir, true); le->metablob.add_null_dentry(dl, dn, true); le->metablob.add_destroyed_inode(in->ino()); - mds->mdlog->submit_entry(le, new C_PurgeStrayLogged(this, dn, pdv, - mds->mdlog->get_current_segment())); + mds->mdlog->submit_entry(le, new C_PurgeStrayLogged(this, dn, pdv, mut)); } } -void StrayManager::_purge_stray_logged(CDentry *dn, version_t pdv, LogSegment *ls) +void StrayManager::_purge_stray_logged(CDentry *dn, version_t pdv, MutationRef& mut) { CInode *in = dn->get_linkage()->get_inode(); CDir *dir = dn->get_dir(); @@ -250,9 +261,9 @@ void StrayManager::_purge_stray_logged(CDentry *dn, version_t pdv, LogSegment *l ceph_assert(dn->get_projected_linkage()->is_null()); dir->unlink_inode(dn, !new_dn); dn->pop_projected_linkage(); - dn->mark_dirty(pdv, ls); + dn->mark_dirty(pdv, mut->ls); - dir->pop_and_dirty_projected_fnode(ls); + mut->apply(); in->state_clear(CInode::STATE_ORPHAN); dn->state_clear(CDentry::STATE_PURGING | CDentry::STATE_PURGINGPINNED); @@ -742,13 +753,13 @@ void StrayManager::truncate(CDentry *dn) this, dn, true)); } -void StrayManager::_truncate_stray_logged(CDentry *dn, LogSegment *ls) +void StrayManager::_truncate_stray_logged(CDentry *dn, MutationRef& mut) { CInode *in = dn->get_projected_linkage()->get_inode(); dout(10) << __func__ << ": " << *dn << " " << *in << dendl; - in->pop_and_dirty_projected_inode(ls); + mut->apply(); in->state_clear(CInode::STATE_PURGING); dn->state_clear(CDentry::STATE_PURGING | CDentry::STATE_PURGINGPINNED); diff --git a/src/mds/StrayManager.h b/src/mds/StrayManager.h index 0ad8a53f0ca..86b6941a513 100644 --- a/src/mds/StrayManager.h +++ b/src/mds/StrayManager.h @@ -17,7 +17,8 @@ #include "include/common_fwd.h" #include "include/elist.h" #include -#include "mds/PurgeQueue.h" +#include "Mutation.h" +#include "PurgeQueue.h" class MDSRank; class CInode; @@ -123,13 +124,13 @@ protected: */ void _purge_stray_purged(CDentry *dn, bool only_head); - void _purge_stray_logged(CDentry *dn, version_t pdv, LogSegment *ls); + void _purge_stray_logged(CDentry *dn, version_t pdv, MutationRef& mut); /** * Callback: we have logged the update to an inode's metadata * reflecting it's newly-zeroed length. */ - void _truncate_stray_logged(CDentry *dn, LogSegment *ls); + void _truncate_stray_logged(CDentry *dn, MutationRef &mut); /** * Call this on a dentry that has been identified as * eligible for purging. It will be passed on to PurgeQueue. diff --git a/src/mds/events/EFragment.h b/src/mds/events/EFragment.h index 9e86f191598..1cbbf7a8a36 100644 --- a/src/mds/events/EFragment.h +++ b/src/mds/events/EFragment.h @@ -19,7 +19,7 @@ #include "EMetaBlob.h" struct dirfrag_rollback { - fnode_t fnode; + CDir::fnode_const_ptr fnode; dirfrag_rollback() { } void encode(bufferlist& bl) const; void decode(bufferlist::const_iterator& bl); diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h index d5917eb9b9b..fd8d2e53796 100644 --- a/src/mds/events/EMetaBlob.h +++ b/src/mds/events/EMetaBlob.h @@ -199,7 +199,7 @@ public: static const int STATE_DIRTYDFT = (1<<5); // dirty dirfragtree //version_t dirv; - fnode_t fnode; + CDir::fnode_const_ptr fnode; __u32 state; __u32 nfull, nremote, nnull; @@ -245,7 +245,7 @@ public: } void print(dirfrag_t dirfrag, ostream& out) const { - out << "dirlump " << dirfrag << " v " << fnode.version + out << "dirlump " << dirfrag << " v " << fnode->version << " state " << state << " num " << nfull << "/" << nremote << "/" << nnull << std::endl; @@ -524,31 +524,30 @@ private: } dirlump& add_dir(CDir *dir, bool dirty, bool complete=false) { - return add_dir(dir->dirfrag(), dir->get_projected_fnode(), dir->get_projected_version(), + return add_dir(dir->dirfrag(), dir->get_projected_fnode(), dirty, complete); } dirlump& add_new_dir(CDir *dir) { - return add_dir(dir->dirfrag(), dir->get_projected_fnode(), dir->get_projected_version(), + return add_dir(dir->dirfrag(), dir->get_projected_fnode(), true, true, true); // dirty AND complete AND new } dirlump& add_import_dir(CDir *dir) { // dirty=false would be okay in some cases - return add_dir(dir->dirfrag(), dir->get_projected_fnode(), dir->get_projected_version(), + return add_dir(dir->dirfrag(), dir->get_projected_fnode(), dir->is_dirty(), dir->is_complete(), false, true, dir->is_dirty_dft()); } dirlump& add_fragmented_dir(CDir *dir, bool dirty, bool dirtydft) { - return add_dir(dir->dirfrag(), dir->get_projected_fnode(), dir->get_projected_version(), + return add_dir(dir->dirfrag(), dir->get_projected_fnode(), dirty, false, false, false, dirtydft); } - dirlump& add_dir(dirfrag_t df, const fnode_t *pf, version_t pv, bool dirty, + dirlump& add_dir(dirfrag_t df, const CDir::fnode_const_ptr& pf, bool dirty, bool complete=false, bool isnew=false, bool importing=false, bool dirty_dft=false) { if (lump_map.count(df) == 0) lump_order.push_back(df); dirlump& l = lump_map[df]; - l.fnode = *pf; - l.fnode.version = pv; + l.fnode = pf; if (complete) l.mark_complete(); if (dirty) l.mark_dirty(); if (isnew) l.mark_new(); diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 32d7f04367d..7ba28cc5fbe 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -694,7 +694,7 @@ void EMetaBlob::nullbit::generate_test_instances(std::list& ls) void EMetaBlob::dirlump::encode(bufferlist& bl, uint64_t features) const { ENCODE_START(2, 2, bl); - encode(fnode, bl); + encode(*fnode, bl); encode(state, bl); encode(nfull, bl); encode(nremote, bl); @@ -707,7 +707,11 @@ void EMetaBlob::dirlump::encode(bufferlist& bl, uint64_t features) const void EMetaBlob::dirlump::decode(bufferlist::const_iterator &bl) { DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl) - decode(fnode, bl); + { + auto _fnode = CDir::allocate_fnode(); + decode(*_fnode, bl); + fnode = std::move(_fnode); + } decode(state, bl); decode(nfull, bl); decode(nremote, bl); @@ -724,7 +728,7 @@ void EMetaBlob::dirlump::dump(Formatter *f) const me->_decode_bits(); } f->open_object_section("fnode"); - fnode.dump(f); + fnode->dump(f); f->close_section(); // fnode f->dump_string("state", state_string()); f->dump_int("nfull", nfull); @@ -756,7 +760,9 @@ void EMetaBlob::dirlump::dump(Formatter *f) const void EMetaBlob::dirlump::generate_test_instances(std::list& ls) { - ls.push_back(new dirlump()); + auto dl = new dirlump(); + dl->fnode = CDir::allocate_fnode(); + ls.push_back(dl); } /** @@ -1168,8 +1174,8 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDPeerUpdate *peerup) dout(10) << "EMetaBlob.replay added dir " << *dir << dendl; } - dir->set_version( lump.fnode.version ); - dir->fnode = lump.fnode; + dir->reset_fnode(std::move(lump.fnode)); + dir->update_projected_version(); if (lump.is_importing()) { dir->state_set(CDir::STATE_AUTH); @@ -1178,14 +1184,14 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDPeerUpdate *peerup) if (lump.is_dirty()) { dir->_mark_dirty(logseg); - if (!(dir->fnode.rstat == dir->fnode.accounted_rstat)) { + if (!(dir->get_fnode()->rstat == dir->get_fnode()->accounted_rstat)) { dout(10) << "EMetaBlob.replay dirty nestinfo on " << *dir << dendl; mds->locker->mark_updated_scatterlock(&dir->inode->nestlock); logseg->dirty_dirfrag_nest.push_back(&dir->inode->item_dirty_dirfrag_nest); } else { dout(10) << "EMetaBlob.replay clean nestinfo on " << *dir << dendl; } - if (!(dir->fnode.fragstat == dir->fnode.accounted_fragstat)) { + if (!(dir->get_fnode()->fragstat == dir->get_fnode()->accounted_fragstat)) { dout(10) << "EMetaBlob.replay dirty fragstat on " << *dir << dendl; mds->locker->mark_updated_scatterlock(&dir->inode->filelock); logseg->dirty_dirfrag_dir.push_back(&dir->inode->item_dirty_dirfrag_dir); @@ -2868,14 +2874,18 @@ void EFragment::generate_test_instances(std::list& ls) void dirfrag_rollback::encode(bufferlist &bl) const { ENCODE_START(1, 1, bl); - encode(fnode, bl); + encode(*fnode, bl); ENCODE_FINISH(bl); } void dirfrag_rollback::decode(bufferlist::const_iterator &bl) { DECODE_START(1, bl); - decode(fnode, bl); + { + auto _fnode = CDir::allocate_fnode(); + decode(*_fnode, bl); + fnode = std::move(_fnode); + } DECODE_FINISH(bl); } diff --git a/src/tools/cephfs/JournalTool.cc b/src/tools/cephfs/JournalTool.cc index 0c9683de608..b20a6b29608 100644 --- a/src/tools/cephfs/JournalTool.cc +++ b/src/tools/cephfs/JournalTool.cc @@ -730,9 +730,9 @@ int JournalTool::recover_dentries( try { old_fnode.decode(old_fnode_iter); dout(4) << "frag " << frag_oid.name << " fnode old v" << - old_fnode.version << " vs new v" << lump.fnode.version << dendl; + old_fnode.version << " vs new v" << lump.fnode->version << dendl; old_fnode_version = old_fnode.version; - write_fnode = old_fnode_version < lump.fnode.version; + write_fnode = old_fnode_version < lump.fnode->version; } catch (const buffer::error &err) { dout(1) << "frag " << frag_oid.name << " is corrupt, overwriting" << dendl; @@ -748,7 +748,7 @@ int JournalTool::recover_dentries( if ((other_pool || write_fnode) && !dry_run) { dout(4) << "writing fnode to omap header" << dendl; bufferlist fnode_bl; - lump.fnode.encode(fnode_bl); + lump.fnode->encode(fnode_bl); if (!other_pool || frag.ino >= MDS_INO_SYSTEM_BASE) { r = output.omap_set_header(frag_oid.name, fnode_bl); } @@ -830,9 +830,9 @@ int JournalTool::recover_dentries( // squash over it with what's in this fullbit dout(10) << "Existing remote inode in slot to be (maybe) written " << "by a full inode from the journal dn '" << fb.dn.c_str() - << "' with lump fnode version " << lump.fnode.version + << "' with lump fnode version " << lump.fnode->version << "vs existing fnode version " << old_fnode_version << dendl; - write_dentry = old_fnode_version < lump.fnode.version; + write_dentry = old_fnode_version < lump.fnode->version; } else if (dentry_type == 'I') { // Read out inode version to compare with backing store InodeStore inode; @@ -893,15 +893,15 @@ int JournalTool::recover_dentries( if (dentry_type == 'L') { dout(10) << "Existing hardlink inode in slot to be (maybe) written " << "by a remote inode from the journal dn '" << rb.dn.c_str() - << "' with lump fnode version " << lump.fnode.version + << "' with lump fnode version " << lump.fnode->version << "vs existing fnode version " << old_fnode_version << dendl; - write_dentry = old_fnode_version < lump.fnode.version; + write_dentry = old_fnode_version < lump.fnode->version; } else if (dentry_type == 'I') { dout(10) << "Existing full inode in slot to be (maybe) written " << "by a remote inode from the journal dn '" << rb.dn.c_str() - << "' with lump fnode version " << lump.fnode.version + << "' with lump fnode version " << lump.fnode->version << "vs existing fnode version " << old_fnode_version << dendl; - write_dentry = old_fnode_version < lump.fnode.version; + write_dentry = old_fnode_version < lump.fnode->version; } else { dout(4) << "corrupt dentry in backing store, overwriting from " "journal" << dendl; @@ -949,15 +949,15 @@ int JournalTool::recover_dentries( if (dentry_type == 'L') { dout(10) << "Existing hardlink inode in slot to be (maybe) removed " << "by null journal dn '" << nb.dn.c_str() - << "' with lump fnode version " << lump.fnode.version + << "' with lump fnode version " << lump.fnode->version << "vs existing fnode version " << old_fnode_version << dendl; - remove_dentry = old_fnode_version < lump.fnode.version; + remove_dentry = old_fnode_version < lump.fnode->version; } else if (dentry_type == 'I') { dout(10) << "Existing full inode in slot to be (maybe) removed " << "by null journal dn '" << nb.dn.c_str() - << "' with lump fnode version " << lump.fnode.version + << "' with lump fnode version " << lump.fnode->version << "vs existing fnode version " << old_fnode_version << dendl; - remove_dentry = old_fnode_version < lump.fnode.version; + remove_dentry = old_fnode_version < lump.fnode->version; } else { dout(4) << "corrupt dentry in backing store, will remove" << dendl; remove_dentry = true;