From: Yan, Zheng Date: Thu, 16 Jul 2020 03:19:10 +0000 (+0800) Subject: mds: use smart pointer to manage CInode::{inode,xattrs,old_inodes} X-Git-Tag: v17.0.0~1622^2~5 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=990c20ab79998815b6937a9ed35c87669e86e8fb;p=ceph.git mds: use smart pointer to manage CInode::{inode,xattrs,old_inodes} this avoid copying whole inode_t and xattr map when journaling inodes. Signed-off-by: "Yan, Zheng" --- diff --git a/src/mds/CDentry.cc b/src/mds/CDentry.cc index da91b1284ca96..34bc91359cc57 100644 --- a/src/mds/CDentry.cc +++ b/src/mds/CDentry.cc @@ -414,7 +414,7 @@ void CDentry::encode_lock_state(int type, bufferlist& bl) if (linkage.is_primary()) { c = 1; encode(c, bl); - encode(linkage.get_inode()->inode.ino, bl); + encode(linkage.get_inode()->ino(), bl); } else if (linkage.is_remote()) { c = 2; diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index c052a4a16b95c..9916abc414543 100755 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -237,7 +237,7 @@ bool CDir::check_rstats(bool scrub) CDentry::linkage_t *dnl = i->second->get_linkage(); if (dnl->is_primary()) { CInode *in = dnl->get_inode(); - nest_info.add(in->inode.accounted_rstat); + nest_info.add(in->get_inode()->accounted_rstat); if (in->is_dir()) frag_info.nsubdirs++; else @@ -277,7 +277,7 @@ bool CDir::check_rstats(bool scrub) CDentry *dn = i->second; if (dn->get_linkage()->is_primary()) { CInode *in = dn->get_linkage()->inode; - dout(1) << *dn << " rstat " << in->inode.accounted_rstat << dendl; + dout(1) << *dn << " rstat " << in->get_inode()->accounted_rstat << dendl; } else { dout(1) << *dn << dendl; } @@ -860,7 +860,7 @@ void CDir::steal_dentry(CDentry *dn) if (dn->get_linkage()->is_primary()) { CInode *in = dn->get_linkage()->get_inode(); - auto pi = in->get_projected_inode(); + const auto& pi = in->get_projected_inode(); if (in->is_dir()) { fnode.fragstat.nsubdirs++; if (in->item_pop_lru.is_on_list()) @@ -1182,7 +1182,7 @@ void CDir::merge(const std::vector& subs, MDSContext::vec& waiters, bool void CDir::resync_accounted_fragstat() { fnode_t *pf = get_projected_fnode(); - auto pi = inode->get_projected_inode(); + const auto& pi = inode->get_projected_inode(); if (pf->accounted_fragstat.version != pi->dirstat.version) { pf->fragstat.version = pi->dirstat.version; @@ -1197,7 +1197,7 @@ void CDir::resync_accounted_fragstat() void CDir::resync_accounted_rstat() { fnode_t *pf = get_projected_fnode(); - auto pi = inode->get_projected_inode(); + const auto& pi = inode->get_projected_inode(); if (pf->accounted_rstat.version != pi->rstat.version) { pf->rstat.version = pi->rstat.version; @@ -1217,8 +1217,8 @@ void CDir::assimilate_dirty_rstat_inodes() if (in->is_frozen()) continue; - auto &pi = in->project_inode(); - pi.inode.version = in->pre_dirty(); + auto pi = in->project_inode(); + pi.inode->version = in->pre_dirty(); inode->mdcache->project_rstat_inode_to_frag(in, this, 0, 0, NULL); } @@ -1799,8 +1799,8 @@ CDentry *CDir::_load_dentry( undef_inode = true; } else if (committed_version == 0 && dn->is_dirty() && - inode_data.inode.ino == in->ino() && - inode_data.inode.version == in->get_version()) { + inode_data.inode->ino == in->ino() && + inode_data.inode->version == in->get_version()) { /* clean underwater item? * Underwater item is something that is dirty in our cache from * journal replay, but was previously flushed to disk before the @@ -1823,23 +1823,23 @@ CDentry *CDir::_load_dentry( if (!dn || undef_inode) { // add inode - CInode *in = cache->get_inode(inode_data.inode.ino, last); + CInode *in = cache->get_inode(inode_data.inode->ino, last); if (!in || undef_inode) { if (undef_inode && in) in->first = first; else in = new CInode(cache, true, first, last); - in->inode = inode_data.inode; + in->reset_inode(std::move(inode_data.inode)); + in->reset_xattrs(std::move(inode_data.xattrs)); // symlink? if (in->is_symlink()) in->symlink = inode_data.symlink; in->dirfragtree.swap(inode_data.dirfragtree); - in->xattrs.swap(inode_data.xattrs); - in->old_inodes.swap(inode_data.old_inodes); - if (!in->old_inodes.empty()) { - snapid_t min_first = in->old_inodes.rbegin()->first + 1; + in->reset_old_inodes(std::move(inode_data.old_inodes)); + if (in->is_any_old_inodes()) { + snapid_t min_first = in->get_old_inodes()->rbegin()->first + 1; if (min_first > in->first) in->first = min_first; } @@ -1855,7 +1855,7 @@ CDentry *CDir::_load_dentry( } dout(12) << "_fetched got " << *dn << " " << *in << dendl; - if (in->inode.is_dirty_rstat()) + if (in->get_inode()->is_dirty_rstat()) in->mark_dirty_rstat(); in->maybe_ephemeral_rand(true, rand_threshold); @@ -1867,15 +1867,15 @@ CDentry *CDir::_load_dentry( dn = add_primary_dentry(dname, in, first, last); } else { dout(0) << "_fetched badness: got (but i already had) " << *in - << " mode " << in->inode.mode - << " mtime " << in->inode.mtime << dendl; + << " mode " << in->get_inode()->mode + << " mtime " << in->get_inode()->mtime << dendl; string dirpath, inopath; this->inode->make_path_string(dirpath); in->make_path_string(inopath); - cache->mds->clog->error() << "loaded dup inode " << inode_data.inode.ino - << " [" << first << "," << last << "] v" << inode_data.inode.version + cache->mds->clog->error() << "loaded dup inode " << inode_data.inode->ino + << " [" << first << "," << last << "] v" << inode_data.inode->version << " at " << dirpath << "/" << dname - << ", but inode " << in->vino() << " v" << in->inode.version + << ", but inode " << in->vino() << " v" << in->get_version() << " already exists at " << inopath; return dn; } diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 292321dc0fc3a..72d720238116d 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -49,7 +49,7 @@ #define dout_context g_ceph_context #define dout_subsys ceph_subsys_mds #undef dout_prefix -#define dout_prefix *_dout << "mds." << mdcache->mds->get_nodeid() << ".cache.ino(" << inode.ino << ") " +#define dout_prefix *_dout << "mds." << mdcache->mds->get_nodeid() << ".cache.ino(" << ino() << ") " class CInodeIOContext : public MDSIOContextBase @@ -108,7 +108,7 @@ std::string_view CInode::pin_name(int p) const //int cinode_pins[CINODE_NUM_PINS]; // counts ostream& CInode::print_db_line_prefix(ostream& out) { - return out << ceph_clock_now() << " mds." << mdcache->mds->get_nodeid() << ".cache.ino(" << inode.ino << ") "; + return out << ceph_clock_now() << " mds." << mdcache->mds->get_nodeid() << ".cache.ino(" << ino() << ") "; } /* @@ -127,7 +127,7 @@ ostream& operator<<(ostream& out, const CInode& in) string path; in.make_path_string(path, true); - out << "[inode " << in.inode.ino; + out << "[inode " << in.ino(); out << " [" << (in.is_multiversion() ? "...":"") << in.first << "," << in.last << "]"; @@ -173,43 +173,44 @@ ostream& operator<<(ostream& out, const CInode& in) if (in.is_frozen_inode()) out << " FROZEN"; if (in.is_frozen_auth_pin()) out << " FROZEN_AUTHPIN"; - const CInode::mempool_inode *pi = in.get_projected_inode(); + const auto& pi = in.get_projected_inode(); if (pi->is_truncating()) out << " truncating(" << pi->truncate_from << " to " << pi->truncate_size << ")"; - if (in.inode.is_dir()) { - out << " " << in.inode.dirstat; + if (in.is_dir()) { + out << " " << in.get_inode()->dirstat; if (g_conf()->mds_debug_scatterstat && in.is_projected()) { - const CInode::mempool_inode *pi = in.get_projected_inode(); out << "->" << pi->dirstat; } } else { - out << " s=" << in.inode.size; - if (in.inode.nlink != 1) - out << " nl=" << in.inode.nlink; + out << " s=" << in.get_inode()->size; + if (in.get_inode()->nlink != 1) + out << " nl=" << in.get_inode()->nlink; } // rstat - out << " " << in.inode.rstat; - if (!(in.inode.rstat == in.inode.accounted_rstat)) - out << "/" << in.inode.accounted_rstat; + out << " " << in.get_inode()->rstat; + if (!(in.get_inode()->rstat == in.get_inode()->accounted_rstat)) + out << "/" << in.get_inode()->accounted_rstat; if (g_conf()->mds_debug_scatterstat && in.is_projected()) { - const CInode::mempool_inode *pi = in.get_projected_inode(); out << "->" << pi->rstat; if (!(pi->rstat == pi->accounted_rstat)) out << "/" << pi->accounted_rstat; } + if (in.is_any_old_inodes()) { + out << " old_inodes=" << in.get_old_inodes()->size(); + } + if (!in.client_need_snapflush.empty()) out << " need_snapflush=" << in.client_need_snapflush; - // locks if (!in.authlock.is_sync_and_unlocked()) out << " " << in.authlock; if (!in.linklock.is_sync_and_unlocked()) out << " " << in.linklock; - if (in.inode.is_dir()) { + if (in.get_inode()->is_dir()) { if (!in.dirfragtreelock.is_sync_and_unlocked()) out << " " << in.dirfragtreelock; if (!in.snaplock.is_sync_and_unlocked()) @@ -230,8 +231,8 @@ ostream& operator<<(ostream& out, const CInode& in) out << " " << in.versionlock; // hack: spit out crap on which clients have caps - if (in.inode.client_ranges.size()) - out << " cr=" << in.inode.client_ranges; + if (in.get_inode()->client_ranges.size()) + out << " cr=" << in.get_inode()->client_ranges; if (!in.get_client_caps().empty()) { out << " caps={"; @@ -270,8 +271,8 @@ ostream& operator<<(ostream& out, const CInode& in) in.print_pin_set(out); } - if (in.inode.export_pin != MDS_RANK_NONE) { - out << " export_pin=" << in.inode.export_pin; + if (in.get_inode()->export_pin != MDS_RANK_NONE) { + out << " export_pin=" << in.get_inode()->export_pin; } if (in.state_test(CInode::STATE_DISTEPHEMERALPIN)) { out << " distepin"; @@ -294,11 +295,8 @@ ostream& operator<<(ostream& out, const CInode::scrub_stamp_info_t& si) return out; } -CInode::CInode(MDCache *c, bool auth, snapid_t f, snapid_t l) - : - mdcache(c), - first(f), last(l), - item_dirty(this), +CInode::CInode(MDCache *c, bool auth, snapid_t f, snapid_t l) : + mdcache(c), first(f), last(l), item_dirty(this), item_caps(this), item_open_file(this), item_dirty_parent(this), @@ -317,7 +315,8 @@ CInode::CInode(MDCache *c, bool auth, snapid_t f, snapid_t l) flocklock(this, &flocklock_type), policylock(this, &policylock_type) { - if (auth) state_set(STATE_AUTH); + if (auth) + state_set(STATE_AUTH); } void CInode::print(ostream& out) @@ -334,7 +333,7 @@ void CInode::add_need_snapflush(CInode *snapin, snapid_t snapid, client_t client // FIXME: this is non-optimal, as we'll block freezes/migrations for potentially // long periods waiting for clients to flush their snaps. - auth_pin(this); // pin head inode... + auth_pin(this); // pin head get_inode()->.. } auto &clients = client_need_snapflush[snapid]; @@ -420,88 +419,67 @@ void CInode::clear_dirty_rstat() } } -CInode::projected_inode &CInode::project_inode(bool xattr, bool snap) +CInode::projected_inode CInode::project_inode(bool xattr, bool snap) { - auto &pi = projected_nodes.empty() ? - projected_nodes.emplace_back(inode) : - projected_nodes.emplace_back(projected_nodes.back().inode); + auto pi = allocate_inode(*get_projected_inode()); if (scrub_infop && scrub_infop->last_scrub_dirty) { - pi.inode.last_scrub_stamp = scrub_infop->last_scrub_stamp; - pi.inode.last_scrub_version = scrub_infop->last_scrub_version; + pi->last_scrub_stamp = scrub_infop->last_scrub_stamp; + pi->last_scrub_version = scrub_infop->last_scrub_version; scrub_infop->last_scrub_dirty = false; scrub_maybe_delete_info(); } + const auto& ox = get_projected_xattrs(); + xattr_map_ptr px; if (xattr) { - pi.xattrs.reset(new mempool_xattr_map(*get_projected_xattrs())); - ++num_projected_xattrs; + px = allocate_xattr_map(); + if (ox) + *px = *ox; } + sr_t* ps = projected_inode::UNDEF_SRNODE; if (snap) { - project_snaprealm(); + ps = prepare_new_srnode(0); + ++num_projected_srnodes; } - dout(15) << __func__ << " " << pi.inode.ino << dendl; - return pi; + projected_nodes.emplace_back(pi, xattr ? px : ox , ps); + + dout(15) << __func__ << " " << pi->ino << dendl; + return projected_inode(std::move(pi), std::move(px), ps); } void CInode::pop_and_dirty_projected_inode(LogSegment *ls) { ceph_assert(!projected_nodes.empty()); - auto& front = projected_nodes.front(); + auto front = std::move(projected_nodes.front()); + dout(15) << __func__ << " v" << front.inode->version << dendl; - dout(15) << __func__ << " " << front.inode.ino - << " v" << front.inode.version << dendl; + projected_nodes.pop_front(); + + bool pool_update = get_inode()->layout.pool_id != front.inode->layout.pool_id; + bool pin_update = get_inode()->export_pin != front.inode->export_pin; + bool dist_update = get_inode()->export_ephemeral_distributed_pin != + front.inode->export_ephemeral_distributed_pin; - int64_t old_pool = inode.layout.pool_id; - bool pin_update = inode.export_pin != front.inode.export_pin; - bool dist_update = inode.export_ephemeral_distributed_pin - != front.inode.export_ephemeral_distributed_pin; + reset_inode(std::move(front.inode)); + if (front.xattrs != get_xattrs()) + reset_xattrs(std::move(front.xattrs)); - mark_dirty(front.inode.version, ls); + if (front.snapnode != projected_inode::UNDEF_SRNODE) { + --num_projected_srnodes; + pop_projected_snaprealm(front.snapnode, false); + } - inode = std::move(front.inode); + mark_dirty(ls); + if (get_inode()->is_backtrace_updated()) + mark_dirty_parent(ls, pool_update); if (pin_update) maybe_export_pin(true); if (dist_update) maybe_ephemeral_dist_children(true); - - if (inode.is_backtrace_updated()) - mark_dirty_parent(ls, old_pool != inode.layout.pool_id); - - if (front.xattrs) { - --num_projected_xattrs; - xattrs = *front.xattrs; - } - - if (projected_nodes.front().snapnode != projected_inode::UNDEF_SRNODE) { - pop_projected_snaprealm(projected_nodes.front().snapnode, false); - --num_projected_srnodes; - } - - projected_nodes.pop_front(); -} - -CInode::mempool_xattr_map *CInode::get_projected_xattrs() -{ - if (num_projected_xattrs > 0) { - for (auto it = projected_nodes.rbegin(); it != projected_nodes.rend(); ++it) - if (it->xattrs) - return it->xattrs.get(); - } - return &xattrs; -} - -CInode::mempool_xattr_map *CInode::get_previous_projected_xattrs() -{ - if (num_projected_xattrs > 0) { - for (auto it = ++projected_nodes.rbegin(); it != projected_nodes.rend(); ++it) - if (it->xattrs) - return it->xattrs.get(); - } - return &xattrs; } sr_t *CInode::prepare_new_srnode(snapid_t snapid) @@ -693,9 +671,11 @@ void CInode::pop_projected_snaprealm(sr_t *next_snaprealm, bool early) // dirfrags +InodeStoreBase::inode_const_ptr InodeStoreBase::empty_inode = InodeStoreBase::allocate_inode(); + __u32 InodeStoreBase::hash_dentry_name(std::string_view dn) { - int which = inode.dir_layout.dl_dir_hash; + int which = inode->dir_layout.dl_dir_hash; if (!which) which = CEPH_STR_HASH_LINUX; ceph_assert(ceph_str_hash_valid(which)); @@ -1060,7 +1040,7 @@ void CInode::make_path(filepath& fp, bool projected) const void CInode::name_stray_dentry(string& dname) { char s[20]; - snprintf(s, sizeof(s), "%llx", (unsigned long long)inode.ino.val); + snprintf(s, sizeof(s), "%llx", (unsigned long long)ino().val); dname = s; } @@ -1070,16 +1050,16 @@ version_t CInode::pre_dirty() CDentry* _cdentry = get_projected_parent_dn(); if (_cdentry) { pv = _cdentry->pre_dirty(get_projected_version()); - dout(10) << "pre_dirty " << pv << " (current v " << inode.version << ")" << dendl; + dout(10) << "pre_dirty " << pv << " (current v " << get_inode()->version << ")" << dendl; } else { ceph_assert(is_base()); pv = get_projected_version() + 1; } // force update backtrace for old format inode (see mempool_inode::decode) - if (inode.backtrace_version == 0 && !projected_nodes.empty()) { - mempool_inode &pi = projected_nodes.back().inode; - if (pi.backtrace_version == 0) - pi.update_backtrace(pv); + if (get_inode()->backtrace_version == 0 && !projected_nodes.empty()) { + auto pi = _get_projected_inode(); + if (pi->backtrace_version == 0) + pi->update_backtrace(pv); } return pv; } @@ -1097,7 +1077,7 @@ void CInode::_mark_dirty(LogSegment *ls) ls->dirty_inodes.push_back(&item_dirty); } -void CInode::mark_dirty(version_t pv, LogSegment *ls) { +void CInode::mark_dirty(LogSegment *ls) { dout(10) << __func__ << " " << *this << dendl; @@ -1112,13 +1092,11 @@ void CInode::mark_dirty(version_t pv, LogSegment *ls) { ceph_assert(is_auth()); // touch my private version - ceph_assert(inode.version < pv); - inode.version = pv; _mark_dirty(ls); // mark dentry too if (parent) - parent->mark_dirty(pv, ls); + parent->mark_dirty(get_version(), ls); } @@ -1309,7 +1287,7 @@ void CInode::_fetched(bufferlist& bl, bufferlist& bl2, Context *fin) void CInode::build_backtrace(int64_t pool, inode_backtrace_t& bt) { - bt.ino = inode.ino; + bt.ino = ino(); bt.ancestors.clear(); bt.pool = pool; @@ -1317,11 +1295,11 @@ void CInode::build_backtrace(int64_t pool, inode_backtrace_t& bt) CDentry *pdn = get_parent_dn(); while (pdn) { CInode *diri = pdn->get_dir()->get_inode(); - bt.ancestors.push_back(inode_backpointer_t(diri->ino(), pdn->get_name(), in->inode.version)); + bt.ancestors.push_back(inode_backpointer_t(diri->ino(), pdn->get_name(), in->get_inode()->version)); in = diri; pdn = in->get_parent_dn(); } - for (auto &p : inode.old_pools) { + for (auto &p : get_inode()->old_pools) { // don't add our own pool id to old_pools to avoid looping (e.g. setlayout 0, 1, 0) if (p != pool) bt.old_pools.insert(p); @@ -1363,17 +1341,17 @@ void CInode::store_backtrace(MDSContext *fin, int op_prio) op.setxattr("parent", parent_bl); bufferlist layout_bl; - encode(inode.layout, layout_bl, mdcache->mds->mdsmap->get_up_features()); + encode(get_inode()->layout, layout_bl, mdcache->mds->mdsmap->get_up_features()); op.setxattr("layout", layout_bl); SnapContext snapc; object_t oid = get_object_name(ino(), frag_t(), ""); object_locator_t oloc(pool); Context *fin2 = new C_OnFinisher( - new C_IO_Inode_StoredBacktrace(this, inode.backtrace_version, fin), + new C_IO_Inode_StoredBacktrace(this, get_inode()->backtrace_version, fin), mdcache->mds->finisher); - if (!state_test(STATE_DIRTYPOOL) || inode.old_pools.empty()) { + if (!state_test(STATE_DIRTYPOOL) || get_inode()->old_pools.empty()) { dout(20) << __func__ << ": no dirtypool or no old pools" << dendl; mdcache->mds->objecter->mutate(oid, oloc, op, snapc, ceph::real_clock::now(), @@ -1389,7 +1367,7 @@ void CInode::store_backtrace(MDSContext *fin, int op_prio) // In the case where DIRTYPOOL is set, we update all old pools backtraces // such that anyone reading them will see the new pool ID in // inode_backtrace_t::pool and go read everything else from there. - for (const auto &p : inode.old_pools) { + for (const auto &p : get_inode()->old_pools) { if (p == pool) continue; @@ -1442,7 +1420,7 @@ void CInode::_stored_backtrace(int r, version_t v, Context *fin) dout(10) << __func__ << " v " << v << dendl; auth_unpin(this); - if (v == inode.backtrace_version) + if (v == get_inode()->backtrace_version) clear_dirty_parent(); if (fin) fin->complete(0); @@ -1450,7 +1428,7 @@ void CInode::_stored_backtrace(int r, version_t v, Context *fin) void CInode::fetch_backtrace(Context *fin, bufferlist *backtrace) { - mdcache->fetch_backtrace(inode.ino, get_backtrace_pool(), *backtrace, fin); + mdcache->fetch_backtrace(ino(), get_backtrace_pool(), *backtrace, fin); } void CInode::mark_dirty_parent(LogSegment *ls, bool dirty_pool) @@ -1510,20 +1488,59 @@ void CInode::verify_diri_backtrace(bufferlist &bl, int err) // parent dir +void InodeStoreBase::encode_xattrs(bufferlist &bl) const { + using ceph::encode; + if (xattrs) + encode(*xattrs, bl); + else + encode((__u32)0, bl); +} + +void InodeStoreBase::decode_xattrs(bufferlist::const_iterator &p) { + using ceph::decode; + mempool_xattr_map tmp; + decode_noshare(tmp, p); + if (tmp.empty()) { + reset_xattrs(xattr_map_ptr()); + } else { + reset_xattrs(allocate_xattr_map(std::move(tmp))); + } +} + +void InodeStoreBase::encode_old_inodes(bufferlist &bl, uint64_t features) const { + using ceph::encode; + if (old_inodes) + encode(*old_inodes, bl, features); + else + encode((__u32)0, bl); +} + +void InodeStoreBase::decode_old_inodes(bufferlist::const_iterator &p) { + using ceph::decode; + mempool_old_inode_map tmp; + decode(tmp, p); + if (tmp.empty()) { + reset_old_inodes(old_inode_map_ptr()); + } else { + reset_old_inodes(allocate_old_inode_map(std::move(tmp))); + } +} + void InodeStoreBase::encode_bare(bufferlist &bl, uint64_t features, const bufferlist *snap_blob) const { using ceph::encode; - encode(inode, bl, features); - if (is_symlink()) + encode(*inode, bl, features); + if (inode->is_symlink()) encode(symlink, bl); encode(dirfragtree, bl); - encode(xattrs, bl); + encode_xattrs(bl); + if (snap_blob) encode(*snap_blob, bl); else encode(bufferlist(), bl); - encode(old_inodes, bl, features); + encode_old_inodes(bl, features); encode(oldest_snap, bl); encode(damage_flags, bl); } @@ -1548,23 +1565,26 @@ void InodeStoreBase::decode_bare(bufferlist::const_iterator &bl, bufferlist& snap_blob, __u8 struct_v) { using ceph::decode; - decode(inode, bl); - if (is_symlink()) { + + auto _inode = allocate_inode(); + decode(*_inode, bl); + + if (_inode->is_symlink()) { std::string tmp; decode(tmp, bl); symlink = std::string_view(tmp); } decode(dirfragtree, bl); - decode_noshare(xattrs, bl); + decode_xattrs(bl); decode(snap_blob, bl); - decode(old_inodes, bl); - if (struct_v == 2 && inode.is_dir()) { + decode_old_inodes(bl); + if (struct_v == 2 && _inode->is_dir()) { bool default_layout_exists; decode(default_layout_exists, bl); if (default_layout_exists) { decode(struct_v, bl); // this was a default_file_layout - decode(inode.layout, bl); // but we only care about the layout portion + decode(_inode->layout, bl); // but we only care about the layout portion } } @@ -1579,6 +1599,8 @@ void InodeStoreBase::decode_bare(bufferlist::const_iterator &bl, decode(damage_flags, bl); } } + + reset_inode(std::move(_inode)); } @@ -1625,52 +1647,58 @@ void CInode::set_object_info(MDSCacheObjectInfo &info) void CInode::encode_lock_iauth(bufferlist& bl) { ENCODE_START(1, 1, bl); - encode(inode.version, bl); - encode(inode.ctime, bl); - encode(inode.mode, bl); - encode(inode.uid, bl); - encode(inode.gid, bl); + encode(get_inode()->version, bl); + encode(get_inode()->ctime, bl); + encode(get_inode()->mode, bl); + encode(get_inode()->uid, bl); + encode(get_inode()->gid, bl); ENCODE_FINISH(bl); } void CInode::decode_lock_iauth(bufferlist::const_iterator& p) { + ceph_assert(!is_auth()); + auto _inode = allocate_inode(*get_inode()); DECODE_START(1, p); - decode(inode.version, p); + decode(_inode->version, p); utime_t tm; decode(tm, p); - if (inode.ctime < tm) inode.ctime = tm; - decode(inode.mode, p); - decode(inode.uid, p); - decode(inode.gid, p); + if (_inode->ctime < tm) _inode->ctime = tm; + decode(_inode->mode, p); + decode(_inode->uid, p); + decode(_inode->gid, p); DECODE_FINISH(p); + reset_inode(std::move(_inode)); } void CInode::encode_lock_ilink(bufferlist& bl) { ENCODE_START(1, 1, bl); - encode(inode.version, bl); - encode(inode.ctime, bl); - encode(inode.nlink, bl); + encode(get_inode()->version, bl); + encode(get_inode()->ctime, bl); + encode(get_inode()->nlink, bl); ENCODE_FINISH(bl); } void CInode::decode_lock_ilink(bufferlist::const_iterator& p) { + ceph_assert(!is_auth()); + auto _inode = allocate_inode(*get_inode()); DECODE_START(1, p); - decode(inode.version, p); + decode(_inode->version, p); utime_t tm; decode(tm, p); - if (inode.ctime < tm) inode.ctime = tm; - decode(inode.nlink, p); + if (_inode->ctime < tm) _inode->ctime = tm; + decode(_inode->nlink, p); DECODE_FINISH(p); + reset_inode(std::move(_inode)); } void CInode::encode_lock_idft(bufferlist& bl) { ENCODE_START(1, 1, bl); if (is_auth()) { - encode(inode.version, bl); + encode(get_inode()->version, bl); } else { // treat flushing as dirty when rejoining cache bool dirty = dirfragtreelock.is_dirty_or_flushing(); @@ -1696,6 +1724,8 @@ void CInode::encode_lock_idft(bufferlist& bl) void CInode::decode_lock_idft(bufferlist::const_iterator& p) { + inode_ptr _inode; + DECODE_START(1, p); if (is_auth()) { bool replica_dirty; @@ -1705,7 +1735,8 @@ void CInode::decode_lock_idft(bufferlist::const_iterator& p) dirfragtreelock.mark_dirty(); // ok bc we're auth and caller will handle } } else { - decode(inode.version, p); + _inode = allocate_inode(*get_inode()); + decode(_inode->version, p); } { fragtree_t temp; @@ -1740,32 +1771,35 @@ void CInode::decode_lock_idft(bufferlist::const_iterator& p) verify_dirfrags(); } DECODE_FINISH(p); + + if (_inode) + reset_inode(std::move(_inode)); } void CInode::encode_lock_ifile(bufferlist& bl) { ENCODE_START(1, 1, bl); if (is_auth()) { - encode(inode.version, bl); - encode(inode.ctime, bl); - encode(inode.mtime, bl); - encode(inode.atime, bl); - encode(inode.time_warp_seq, bl); + encode(get_inode()->version, bl); + encode(get_inode()->ctime, bl); + encode(get_inode()->mtime, bl); + encode(get_inode()->atime, bl); + encode(get_inode()->time_warp_seq, bl); if (!is_dir()) { - encode(inode.layout, bl, mdcache->mds->mdsmap->get_up_features()); - encode(inode.size, bl); - encode(inode.truncate_seq, bl); - encode(inode.truncate_size, bl); - encode(inode.client_ranges, bl); - encode(inode.inline_data, bl); + encode(get_inode()->layout, bl, mdcache->mds->mdsmap->get_up_features()); + encode(get_inode()->size, bl); + encode(get_inode()->truncate_seq, bl); + encode(get_inode()->truncate_size, bl); + encode(get_inode()->client_ranges, bl); + encode(get_inode()->inline_data, bl); } } else { // treat flushing as dirty when rejoining cache bool dirty = filelock.is_dirty_or_flushing(); encode(dirty, bl); } - dout(15) << __func__ << " inode.dirstat is " << inode.dirstat << dendl; - encode(inode.dirstat, bl); // only meaningful if i am auth. + dout(15) << __func__ << " inode.dirstat is " << get_inode()->dirstat << dendl; + encode(get_inode()->dirstat, bl); // only meaningful if i am auth. bufferlist tmp; __u32 n = 0; for (const auto &p : dirfrags) { @@ -1790,22 +1824,26 @@ void CInode::encode_lock_ifile(bufferlist& bl) void CInode::decode_lock_ifile(bufferlist::const_iterator& p) { + inode_ptr _inode; + DECODE_START(1, p); if (!is_auth()) { - decode(inode.version, p); + _inode = allocate_inode(*get_inode()); + + decode(_inode->version, p); utime_t tm; decode(tm, p); - if (inode.ctime < tm) inode.ctime = tm; - decode(inode.mtime, p); - decode(inode.atime, p); - decode(inode.time_warp_seq, p); + if (_inode->ctime < tm) _inode->ctime = tm; + decode(_inode->mtime, p); + decode(_inode->atime, p); + decode(_inode->time_warp_seq, p); if (!is_dir()) { - decode(inode.layout, p); - decode(inode.size, p); - decode(inode.truncate_seq, p); - decode(inode.truncate_size, p); - decode(inode.client_ranges, p); - decode(inode.inline_data, p); + decode(_inode->layout, p); + decode(_inode->size, p); + decode(_inode->truncate_seq, p); + decode(_inode->truncate_size, p); + decode(_inode->client_ranges, p); + decode(_inode->inline_data, p); } } else { bool replica_dirty; @@ -1820,7 +1858,7 @@ void CInode::decode_lock_ifile(bufferlist::const_iterator& p) decode(dirstat, p); if (!is_auth()) { dout(10) << " taking inode dirstat " << dirstat << " for " << *this << dendl; - inode.dirstat = dirstat; // take inode summation if replica + _inode->dirstat = dirstat; // take inode summation if replica } __u32 n; decode(n, p); @@ -1857,25 +1895,28 @@ void CInode::decode_lock_ifile(bufferlist::const_iterator& p) dir->first = fgfirst; fnode_t *pf = dir->get_projected_fnode(); finish_scatter_update(&filelock, dir, - inode.dirstat.version, pf->accounted_fragstat.version); + _inode->dirstat.version, pf->accounted_fragstat.version); } } } DECODE_FINISH(p); + + if (_inode) + reset_inode(std::move(_inode)); } void CInode::encode_lock_inest(bufferlist& bl) { ENCODE_START(1, 1, bl); if (is_auth()) { - encode(inode.version, bl); + encode(get_inode()->version, bl); } else { // treat flushing as dirty when rejoining cache bool dirty = nestlock.is_dirty_or_flushing(); encode(dirty, bl); } - dout(15) << __func__ << " inode.rstat is " << inode.rstat << dendl; - encode(inode.rstat, bl); // only meaningful if i am auth. + dout(15) << __func__ << " inode.rstat is " << get_inode()->rstat << dendl; + encode(get_inode()->rstat, bl); // only meaningful if i am auth. bufferlist tmp; __u32 n = 0; for (const auto &p : dirfrags) { @@ -1902,6 +1943,8 @@ void CInode::encode_lock_inest(bufferlist& bl) void CInode::decode_lock_inest(bufferlist::const_iterator& p) { + inode_ptr _inode; + DECODE_START(1, p); if (is_auth()) { bool replica_dirty; @@ -1911,13 +1954,14 @@ void CInode::decode_lock_inest(bufferlist::const_iterator& p) nestlock.mark_dirty(); // ok bc we're auth and caller will handle } } else { - decode(inode.version, p); + _inode = allocate_inode(*get_inode()); + decode(_inode->version, p); } nest_info_t rstat; decode(rstat, p); if (!is_auth()) { dout(10) << __func__ << " taking inode rstat " << rstat << " for " << *this << dendl; - inode.rstat = rstat; // take inode summation if replica + _inode->rstat = rstat; // take inode summation if replica } __u32 n; decode(n, p); @@ -1956,109 +2000,122 @@ void CInode::decode_lock_inest(bufferlist::const_iterator& p) dir->first = fgfirst; fnode_t *pf = dir->get_projected_fnode(); finish_scatter_update(&nestlock, dir, - inode.rstat.version, pf->accounted_rstat.version); + _inode->rstat.version, pf->accounted_rstat.version); } } } DECODE_FINISH(p); + + if (_inode) + reset_inode(std::move(_inode)); } void CInode::encode_lock_ixattr(bufferlist& bl) { ENCODE_START(1, 1, bl); - encode(inode.version, bl); - encode(inode.ctime, bl); - encode(xattrs, bl); + encode(get_inode()->version, bl); + encode(get_inode()->ctime, bl); + encode_xattrs(bl); ENCODE_FINISH(bl); } void CInode::decode_lock_ixattr(bufferlist::const_iterator& p) { + ceph_assert(!is_auth()); + auto _inode = allocate_inode(*get_inode()); DECODE_START(1, p); - decode(inode.version, p); + decode(_inode->version, p); utime_t tm; decode(tm, p); - if (inode.ctime < tm) inode.ctime = tm; - decode_noshare(xattrs, p); + if (_inode->ctime < tm) + _inode->ctime = tm; + decode_xattrs(p); DECODE_FINISH(p); + reset_inode(std::move(_inode)); } void CInode::encode_lock_isnap(bufferlist& bl) { ENCODE_START(1, 1, bl); - encode(inode.version, bl); - encode(inode.ctime, bl); + encode(get_inode()->version, bl); + encode(get_inode()->ctime, bl); encode_snap(bl); ENCODE_FINISH(bl); } void CInode::decode_lock_isnap(bufferlist::const_iterator& p) { + ceph_assert(!is_auth()); + auto _inode = allocate_inode(*get_inode()); DECODE_START(1, p); - decode(inode.version, p); + decode(_inode->version, p); utime_t tm; decode(tm, p); - if (inode.ctime < tm) inode.ctime = tm; + if (_inode->ctime < tm) _inode->ctime = tm; decode_snap(p); DECODE_FINISH(p); + reset_inode(std::move(_inode)); } void CInode::encode_lock_iflock(bufferlist& bl) { ENCODE_START(1, 1, bl); - encode(inode.version, bl); + encode(get_inode()->version, bl); _encode_file_locks(bl); ENCODE_FINISH(bl); } void CInode::decode_lock_iflock(bufferlist::const_iterator& p) { + ceph_assert(!is_auth()); + auto _inode = allocate_inode(*get_inode()); DECODE_START(1, p); - decode(inode.version, p); + decode(_inode->version, p); _decode_file_locks(p); DECODE_FINISH(p); + reset_inode(std::move(_inode)); } void CInode::encode_lock_ipolicy(bufferlist& bl) { ENCODE_START(2, 1, bl); - if (inode.is_dir()) { - encode(inode.version, bl); - encode(inode.ctime, bl); - encode(inode.layout, bl, mdcache->mds->mdsmap->get_up_features()); - encode(inode.quota, bl); - encode(inode.export_pin, bl); - encode(inode.export_ephemeral_distributed_pin, bl); - encode(inode.export_ephemeral_random_pin, bl); + if (is_dir()) { + encode(get_inode()->version, bl); + encode(get_inode()->ctime, bl); + encode(get_inode()->layout, bl, mdcache->mds->mdsmap->get_up_features()); + encode(get_inode()->quota, bl); + encode(get_inode()->export_pin, bl); + encode(get_inode()->export_ephemeral_distributed_pin, bl); + encode(get_inode()->export_ephemeral_random_pin, bl); } ENCODE_FINISH(bl); } void CInode::decode_lock_ipolicy(bufferlist::const_iterator& p) { - DECODE_START(2, p); - if (inode.is_dir()) { - decode(inode.version, p); + ceph_assert(!is_auth()); + auto _inode = allocate_inode(*get_inode()); + DECODE_START(1, p); + if (is_dir()) { + decode(_inode->version, p); utime_t tm; decode(tm, p); - if (inode.ctime < tm) inode.ctime = tm; - decode(inode.layout, p); - decode(inode.quota, p); - { - mds_rank_t old_pin = inode.export_pin; - decode(inode.export_pin, p); - maybe_export_pin(old_pin != inode.export_pin); - } + if (_inode->ctime < tm) + _inode->ctime = tm; + decode(_inode->layout, p); + decode(_inode->quota, p); + decode(_inode->export_pin, p); if (struct_v >= 2) { - { - bool old_ephemeral_pin = inode.export_ephemeral_distributed_pin; - decode(inode.export_ephemeral_distributed_pin, p); - maybe_ephemeral_dist_children(old_ephemeral_pin != inode.export_ephemeral_distributed_pin); - } - decode(inode.export_ephemeral_random_pin, p); + decode(_inode->export_ephemeral_distributed_pin, p); + decode(_inode->export_ephemeral_random_pin, p); } } DECODE_FINISH(p); + mds_rank_t old_export_pin = get_inode()->export_pin; + bool old_ephemeral_pin = get_inode()->export_ephemeral_distributed_pin; + reset_inode(std::move(_inode)); + maybe_export_pin(old_export_pin != get_inode()->export_pin); + maybe_ephemeral_dist_children(old_ephemeral_pin != get_inode()->export_ephemeral_distributed_pin); } void CInode::encode_lock_state(int type, bufferlist& bl) @@ -2226,7 +2283,7 @@ void CInode::start_scatter(ScatterLock *lock) { dout(10) << __func__ << " " << *lock << " on " << *this << dendl; ceph_assert(is_auth()); - mempool_inode *pi = get_projected_inode(); + const auto& pi = get_projected_inode(); for (const auto &p : dirfrags) { frag_t fg = p.first; @@ -2286,18 +2343,17 @@ void CInode::finish_scatter_update(ScatterLock *lock, CDir *dir, MutationRef mut(new MutationImpl()); mut->ls = mdlog->get_current_segment(); - mempool_inode *pi = get_projected_inode(); fnode_t *pf = dir->project_fnode(); std::string_view ename; switch (lock->get_type()) { case CEPH_LOCK_IFILE: - pf->fragstat.version = pi->dirstat.version; + pf->fragstat.version = inode_version; pf->accounted_fragstat = pf->fragstat; ename = "lock ifile accounted scatter stat update"; break; case CEPH_LOCK_INEST: - pf->rstat.version = pi->rstat.version; + pf->rstat.version = inode_version; pf->accounted_rstat = pf->rstat; ename = "lock inest accounted scatter stat update"; @@ -2390,7 +2446,7 @@ void CInode::finish_scatter_gather_update(int type) // adjust summation ceph_assert(is_auth()); - mempool_inode *pi = get_projected_inode(); + auto pi = _get_projected_inode(); bool touched_mtime = false, touched_chattr = false; dout(20) << " orig dirstat " << pi->dirstat << dendl; @@ -2504,7 +2560,7 @@ void CInode::finish_scatter_gather_update(int type) if (const sr_t *srnode = get_projected_srnode(); srnode) rstat.rsnaps = srnode->snaps.size(); - mempool_inode *pi = get_projected_inode(); + auto pi = _get_projected_inode(); dout(20) << " orig rstat " << pi->rstat << dendl; pi->rstat.version++; for (const auto &p : dirfrags) { @@ -2907,27 +2963,32 @@ mds_authority_t CInode::authority() const snapid_t CInode::get_oldest_snap() { snapid_t t = first; - if (!old_inodes.empty()) - t = old_inodes.begin()->second.first; + if (is_any_old_inodes()) + t = get_old_inodes()->begin()->second.first; return std::min(t, oldest_snap); } -CInode::mempool_old_inode& CInode::cow_old_inode(snapid_t follows, bool cow_head) +const CInode::mempool_old_inode& CInode::cow_old_inode(snapid_t follows, bool cow_head) { ceph_assert(follows >= first); - mempool_inode *pi = cow_head ? get_projected_inode() : get_previous_projected_inode(); - mempool_xattr_map *px = cow_head ? get_projected_xattrs() : get_previous_projected_xattrs(); + const auto& pi = cow_head ? get_projected_inode() : get_previous_projected_inode(); + const auto& px = cow_head ? get_projected_xattrs() : get_previous_projected_xattrs(); + + auto _old_inodes = allocate_old_inode_map(); + if (old_inodes) + *_old_inodes = *old_inodes; - mempool_old_inode &old = old_inodes[follows]; + mempool_old_inode &old = (*_old_inodes)[follows]; old.first = first; old.inode = *pi; - old.xattrs = *px; + if (px) { + dout(10) << " " << px->size() << " xattrs cowed, " << *px << dendl; + old.xattrs = *px; + } if (first < oldest_snap) oldest_snap = first; - - dout(10) << " " << px->size() << " xattrs cowed, " << *px << dendl; old.inode.trim_client_ranges(follows); @@ -2941,22 +3002,10 @@ CInode::mempool_old_inode& CInode::cow_old_inode(snapid_t follows, bool cow_head << " to [" << old.first << "," << follows << "] on " << *this << dendl; + reset_old_inodes(std::move(_old_inodes)); return old; } -void CInode::split_old_inode(snapid_t snap) -{ - auto it = old_inodes.lower_bound(snap); - ceph_assert(it != old_inodes.end() && it->second.first < snap); - - mempool_old_inode &old = old_inodes[snap - 1]; - old = it->second; - - it->second.first = snap; - dout(10) << __func__ << " " << "[" << old.first << "," << it->first - << "] to [" << snap << "," << it->first << "] on " << *this << dendl; -} - void CInode::pre_cow_old_inode() { snapid_t follows = mdcache->get_global_snaprealm()->get_newest_seq(); @@ -2967,11 +3016,11 @@ void CInode::pre_cow_old_inode() bool CInode::has_snap_data(snapid_t snapid) { bool found = snapid >= first && snapid <= last; - if (!found && is_multiversion()) { - auto p = old_inodes.lower_bound(snapid); - if (p != old_inodes.end()) { + if (!found && is_any_old_inodes()) { + auto p = old_inodes->lower_bound(snapid); + if (p != old_inodes->end()) { if (p->second.first > snapid) { - if (p != old_inodes.begin()) + if (p != old_inodes->begin()) --p; } if (p->second.first <= snapid && snapid <= p->first) { @@ -2986,30 +3035,43 @@ void CInode::purge_stale_snap_data(const set& snaps) { dout(10) << __func__ << " " << snaps << dendl; - for (auto it = old_inodes.begin(); it != old_inodes.end(); ) { - const snapid_t &id = it->first; - const auto &s = snaps.lower_bound(it->second.first); + if (!get_old_inodes()) + return; + + std::vector to_remove; + for (auto p : *get_old_inodes()) { + const snapid_t &id = p.first; + const auto &s = snaps.lower_bound(p.second.first); if (s == snaps.end() || *s > id) { - dout(10) << " purging old_inode [" << it->second.first << "," << id << "]" << dendl; - it = old_inodes.erase(it); - } else { - ++it; + dout(10) << " purging old_inode [" << p.second.first << "," << id << "]" << dendl; + to_remove.push_back(id); } } + + if (to_remove.size() == get_old_inodes()->size()) { + reset_old_inodes(old_inode_map_ptr()); + } else if (!to_remove.empty()) { + auto _old_inodes = allocate_old_inode_map(*get_old_inodes()); + for (auto id : to_remove) + _old_inodes->erase(id); + reset_old_inodes(std::move(_old_inodes)); + } } /* * pick/create an old_inode */ -CInode::mempool_old_inode * CInode::pick_old_inode(snapid_t snap) +snapid_t CInode::pick_old_inode(snapid_t snap) const { - auto it = old_inodes.lower_bound(snap); // p is first key >= to snap - if (it != old_inodes.end() && it->second.first <= snap) { - dout(10) << __func__ << " snap " << snap << " -> [" << it->second.first << "," << it->first << "]" << dendl; - return &it->second; + if (is_any_old_inodes()) { + auto it = old_inodes->lower_bound(snap); // p is first key >= to snap + if (it != old_inodes->end() && it->second.first <= snap) { + dout(10) << __func__ << " snap " << snap << " -> [" << it->second.first << "," << it->first << "]" << dendl; + return it->first; + } } dout(10) << __func__ << " snap " << snap << " -> nothing" << dendl; - return NULL; + return 0; } void CInode::open_snaprealm(bool nosplit) @@ -3462,7 +3524,7 @@ int CInode::get_xlocker_mask(client_t client) const } int CInode::get_caps_allowed_for_client(Session *session, Capability *cap, - mempool_inode *file_i) const + const mempool_inode *file_i) const { client_t client = session->get_client(); int allowed; @@ -3588,10 +3650,10 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, bool valid = true; // pick a version! - mempool_inode *oi = &inode; - mempool_inode *pi = get_projected_inode(); + const mempool_inode *oi = get_inode().get(); + const mempool_inode *pi = get_projected_inode().get(); - CInode::mempool_xattr_map *pxattrs = nullptr; + const mempool_xattr_map *pxattrs = nullptr; if (snapid != CEPH_NOSNAP) { @@ -3599,11 +3661,11 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, if (!is_auth()) valid = false; - if (is_multiversion()) { - auto it = old_inodes.lower_bound(snapid); - if (it != old_inodes.end()) { + if (is_any_old_inodes()) { + auto it = old_inodes->lower_bound(snapid); + if (it != old_inodes->end()) { if (it->second.first > snapid) { - if (it != old_inodes.begin()) + if (it != old_inodes->begin()) --it; } if (it->second.first <= snapid && snapid <= it->first) { @@ -3611,9 +3673,8 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, << " to old_inode [" << it->second.first << "," << it->first << "]" << " " << it->second.inode.rstat << dendl; - auto &p = it->second; - pi = oi = &p.inode; - pxattrs = &p.xattrs; + pi = oi = &it->second.inode; + pxattrs = &it->second.xattrs; } else { // snapshoted remote dentry can result this dout(0) << __func__ << " old_inode for snapid " << snapid @@ -3671,7 +3732,7 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, bool plocal = versionlock.get_last_wrlock_client() == client; bool ppolicy = policylock.is_xlocked_by_client(client) || get_loner()==client; - mempool_inode *any_i = (pfile|pauth|plink|pxattr|plocal) ? pi : oi; + const mempool_inode *any_i = (pfile|pauth|plink|pxattr|plocal) ? pi : oi; dout(20) << " pfile " << pfile << " pauth " << pauth << " plink " << plink << " pxattr " << pxattr @@ -3680,7 +3741,7 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, << " valid=" << valid << dendl; // file - mempool_inode *file_i = pfile ? pi:oi; + const mempool_inode *file_i = pfile ? pi:oi; file_layout_t layout; if (is_dir()) { layout = (ppolicy ? pi : oi)->layout; @@ -3689,11 +3750,23 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, } // max_size is min of projected, actual - uint64_t max_size = - std::min(oi->client_ranges.count(client) ? - oi->client_ranges[client].range.last : 0, - pi->client_ranges.count(client) ? - pi->client_ranges[client].range.last : 0); + uint64_t max_size; + { + auto it = oi->client_ranges.find(client); + if (it == oi->client_ranges.end()) { + max_size = 0; + } else { + max_size = it->second.range.last; + if (oi != pi) { + it = pi->client_ranges.find(client); + if (it == pi->client_ranges.end()) { + max_size = 0; + } else { + max_size = std::min(max_size, it->second.range.last); + } + } + } + } // inline data version_t inline_version = 0; @@ -3705,7 +3778,7 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, (getattr_caps & CEPH_CAP_FILE_RD)) { // client requests inline data inline_version = file_i->inline_data.version; if (file_i->inline_data.length() > 0) - inline_data = file_i->inline_data.get_data(); + file_i->inline_data.get_data(inline_data); } // nest (do same as file... :/) @@ -3715,13 +3788,13 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, } // auth - mempool_inode *auth_i = pauth ? pi:oi; + const mempool_inode *auth_i = pauth ? pi:oi; // link - mempool_inode *link_i = plink ? pi:oi; + const mempool_inode *link_i = plink ? pi:oi; // xattr - mempool_inode *xattr_i = pxattr ? pi:oi; + const mempool_inode *xattr_i = pxattr ? pi:oi; using ceph::encode; // xattr @@ -3730,7 +3803,7 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, (cap && cap->client_xattr_version < xattr_i->xattr_version) || (getattr_caps & CEPH_CAP_XATTR_SHARED)) { // client requests xattrs if (!pxattrs) - pxattrs = pxattr ? get_projected_xattrs() : &xattrs; + pxattrs = pxattr ? get_projected_xattrs().get() : get_xattrs().get(); xattr_version = xattr_i->xattr_version; } else { xattr_version = 0; @@ -3936,7 +4009,7 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, encode_xattrs(); encode(inline_version, bl); encode(inline_data, bl); - mempool_inode *policy_i = ppolicy ? pi : oi; + const mempool_inode *policy_i = ppolicy ? pi : oi; encode(policy_i->quota, bl); encode(layout.pool_ns, bl); encode(any_i->btime, bl); @@ -3989,7 +4062,7 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, encode(inline_data, bl); } if (conn->has_feature(CEPH_FEATURE_MDS_QUOTA)) { - mempool_inode *policy_i = ppolicy ? pi : oi; + const mempool_inode *policy_i = ppolicy ? pi : oi; encode(policy_i->quota, bl); } if (conn->has_feature(CEPH_FEATURE_FS_FILE_LAYOUT_V2)) { @@ -4015,9 +4088,9 @@ void CInode::encode_cap_message(const ref_t &m, Capability *cap) bool plink = linklock.is_xlocked_by_client(client); bool pxattr = xattrlock.is_xlocked_by_client(client); - mempool_inode *oi = &inode; - mempool_inode *pi = get_projected_inode(); - mempool_inode *i = (pfile|pauth|plink|pxattr) ? pi : oi; + const mempool_inode *oi = get_inode().get(); + const mempool_inode *pi = get_projected_inode().get(); + const mempool_inode *i = (pfile|pauth|plink|pxattr) ? pi : oi; dout(20) << __func__ << " pfile " << pfile << " pauth " << pauth << " plink " << plink << " pxattr " << pxattr @@ -4039,15 +4112,30 @@ void CInode::encode_cap_message(const ref_t &m, Capability *cap) if (cap->client_inline_version < i->inline_data.version) { m->inline_version = cap->client_inline_version = i->inline_data.version; if (i->inline_data.length() > 0) - m->inline_data = i->inline_data.get_data(); + i->inline_data.get_data(m->inline_data); } else { m->inline_version = 0; } // max_size is min of projected, actual. - uint64_t oldms = oi->client_ranges.count(client) ? oi->client_ranges[client].range.last : 0; - uint64_t newms = pi->client_ranges.count(client) ? pi->client_ranges[client].range.last : 0; - m->max_size = std::min(oldms, newms); + { + uint64_t max_size; + auto it = oi->client_ranges.find(client); + if (it == oi->client_ranges.end()) { + max_size = 0; + } else { + max_size = it->second.range.last; + if (oi != pi) { + it = pi->client_ranges.find(client); + if (it == pi->client_ranges.end()) { + max_size = 0; + } else { + max_size = std::min(max_size, it->second.range.last); + } + } + } + m->max_size = max_size; + } i = pauth ? pi:oi; m->head.mode = i->mode; @@ -4059,11 +4147,14 @@ void CInode::encode_cap_message(const ref_t &m, Capability *cap) using ceph::encode; i = pxattr ? pi:oi; - auto ix = pxattr ? get_projected_xattrs() : &xattrs; + const auto& ix = pxattr ? get_projected_xattrs() : get_xattrs(); if ((cap->pending() & CEPH_CAP_XATTR_SHARED) && i->xattr_version > cap->client_xattr_version) { dout(10) << " including xattrs v " << i->xattr_version << dendl; - encode(*ix, m->xattrbl); + if (ix) + encode(*ix, m->xattrbl); + else + encode((__u32)0, m->xattrbl); m->head.xattr_version = i->xattr_version; cap->client_xattr_version = i->xattr_version; } @@ -4075,11 +4166,11 @@ void CInode::_encode_base(bufferlist& bl, uint64_t features) { ENCODE_START(1, 1, bl); encode(first, bl); - encode(inode, bl, features); + encode(*get_inode(), bl, features); encode(symlink, bl); encode(dirfragtree, bl); - encode(xattrs, bl); - encode(old_inodes, bl, features); + encode_xattrs(bl); + encode_old_inodes(bl, features); encode(damage_flags, bl); encode_snap(bl); ENCODE_FINISH(bl); @@ -4088,15 +4179,19 @@ void CInode::_decode_base(bufferlist::const_iterator& p) { DECODE_START(1, p); decode(first, p); - decode(inode, p); + { + auto _inode = allocate_inode(); + decode(*_inode, p); + reset_inode(std::move(_inode)); + } { std::string tmp; decode(tmp, p); symlink = std::string_view(tmp); } decode(dirfragtree, p); - decode_noshare(xattrs, p); - decode(old_inodes, p); + decode_xattrs(p); + decode_old_inodes(p); decode(damage_flags, p); decode_snap(p); DECODE_FINISH(p); @@ -4231,7 +4326,7 @@ void CInode::encode_export(bufferlist& bl) // include scatterlock info for any bounding CDirs bufferlist bounding; - if (inode.is_dir()) + if (get_inode()->is_dir()) for (const auto &p : dirfrags) { CDir *dir = p.second; if (dir->state_test(CDir::STATE_EXPORTBOUND)) { @@ -4360,16 +4455,18 @@ void CInode::decode_import(bufferlist::const_iterator& p, void InodeStoreBase::dump(Formatter *f) const { - inode.dump(f); + inode->dump(f); f->dump_string("symlink", symlink); f->open_array_section("xattrs"); - for (const auto& [key, val] : xattrs) { - f->open_object_section("xattr"); - f->dump_string("key", key); - std::string v(val.c_str(), val.length()); - f->dump_string("val", v); - f->close_section(); + if (xattrs) { + for (const auto& [key, val] : *xattrs) { + f->open_object_section("xattr"); + f->dump_string("key", key); + std::string v(val.c_str(), val.length()); + f->dump_string("val", v); + f->close_section(); + } } f->close_section(); f->open_object_section("dirfragtree"); @@ -4377,12 +4474,14 @@ void InodeStoreBase::dump(Formatter *f) const f->close_section(); // dirfragtree f->open_array_section("old_inodes"); - for (const auto &p : old_inodes) { - f->open_object_section("old_inode"); - // The key is the last snapid, the first is in the mempool_old_inode - f->dump_int("last", p.first); - p.second.dump(f); - f->close_section(); // old_inode + if (old_inodes) { + for (const auto &p : *old_inodes) { + f->open_object_section("old_inode"); + // The key is the last snapid, the first is in the mempool_old_inode + f->dump_int("last", p.first); + p.second.dump(f); + f->close_section(); // old_inode + } } f->close_section(); // old_inodes @@ -4396,12 +4495,26 @@ void decode_json_obj(mempool::mds_co::string& t, JSONObj *obj){ t = mempool::mds_co::string(std::string_view(obj->get_data())); } -void InodeStoreBase::decode_json(JSONObj *obj){ +void InodeStoreBase::decode_json(JSONObj *obj) +{ + { + auto _inode = allocate_inode(); + _inode->decode_json(obj); + reset_inode(std::move(_inode)); + } - inode.decode_json(obj); JSONDecoder::decode_json("symlink", symlink, obj, true); // JSONDecoder::decode_json("dirfragtree", dirfragtree, obj, true); // cann't decode it now - JSONDecoder::decode_json("xattrs", InodeStoreBase::xattrs, xattrs_cb, obj, true); + // + // + { + mempool_xattr_map tmp; + JSONDecoder::decode_json("xattrs", tmp, xattrs_cb, obj, true); + if (tmp.empty()) + reset_xattrs(xattr_map_ptr()); + else + reset_xattrs(allocate_xattr_map(std::move(tmp))); + } // JSONDecoder::decode_json("old_inodes", old_inodes, InodeStoreBase::old_indoes_cb, obj, true); // cann't decode old_inodes now JSONDecoder::decode_json("oldest_snap", oldest_snap.val, obj, true); JSONDecoder::decode_json("damage_flags", damage_flags, obj, true); @@ -4431,7 +4544,7 @@ void InodeStoreBase::old_indoes_cb(InodeStoreBase::mempool_old_inode_map& c, JSO void InodeStore::generate_test_instances(std::list &ls) { InodeStore *populated = new InodeStore; - populated->inode.ino = 0xdeadbeef; + populated->get_inode()->ino = 0xdeadbeef; populated->symlink = "rhubarb"; ls.push_back(populated); } @@ -4439,7 +4552,7 @@ void InodeStore::generate_test_instances(std::list &ls) void InodeStoreBare::generate_test_instances(std::list &ls) { InodeStoreBare *populated = new InodeStoreBare; - populated->inode.ino = 0xdeadbeef; + populated->get_inode()->ino = 0xdeadbeef; populated->symlink = "rhubarb"; ls.push_back(populated); } @@ -4514,10 +4627,10 @@ void CInode::validate_disk_state(CInode::validated_data *results, bool _start(int rval) { if (in->is_dirty()) { - MDCache *mdcache = in->mdcache; - mempool_inode& inode = in->inode; + MDCache *mdcache = in->mdcache; // For the benefit of dout + auto ino = [this]() { return in->ino(); }; // For the benefit of dout dout(20) << "validating a dirty CInode; results will be inconclusive" - << dendl; + << dendl; } if (in->is_symlink()) { // there's nothing to do for symlinks! @@ -4552,7 +4665,7 @@ void CInode::validate_disk_state(CInode::validated_data *results, int memory_newer; MDCache *mdcache = in->mdcache; // For the benefit of dout - const mempool_inode& inode = in->inode; // For the benefit of dout + auto ino = [this]() { return in->ino(); }; // For the benefit of dout // Ignore rval because it's the result of a FAILOK operation // from fetch_backtrace_and_tag: the real result is in @@ -4621,18 +4734,18 @@ next: { InoTable *inotable = mdcache->mds->inotable; - dout(10) << "scrub: inotable ino = " << inode.ino << dendl; + dout(10) << "scrub: inotable ino = " << in->ino() << dendl; dout(10) << "scrub: inotable free says " - << inotable->is_marked_free(inode.ino) << dendl; + << inotable->is_marked_free(in->ino()) << dendl; - if (inotable->is_marked_free(inode.ino)) { + if (inotable->is_marked_free(in->ino())) { LogChannelRef clog = in->mdcache->mds->clog; - clog->error() << "scrub: inode wrongly marked free: " << inode.ino; + clog->error() << "scrub: inode wrongly marked free: " << in->ino(); if (in->scrub_infop->header->get_repair()) { - bool repaired = inotable->repair(inode.ino); + bool repaired = inotable->repair(in->ino()); if (repaired) { - clog->error() << "inode table repaired for inode: " << inode.ino; + clog->error() << "inode table repaired for inode: " << in->ino(); inotable->save(); } else { @@ -4658,7 +4771,7 @@ next: if (in->is_base()) { if (!shadow_in) { shadow_in = new CInode(in->mdcache); - in->mdcache->create_unlinked_system_inode(shadow_in, in->inode.ino, in->inode.mode); + in->mdcache->create_unlinked_system_inode(shadow_in, in->ino(), in->get_inode()->mode); in->mdcache->num_shadow_inodes++; } shadow_in->fetch(get_internal_callback(INODE)); @@ -4671,20 +4784,21 @@ next: } bool _inode_disk(int rval) { + const auto& si = shadow_in->get_inode(); + const auto& i = in->get_inode(); + results->inode.checked = true; results->inode.ondisk_read_retval = rval; - results->inode.ondisk_value = shadow_in->inode; - results->inode.memory_value = in->inode; + results->inode.ondisk_value = *si; + results->inode.memory_value = *i; - mempool_inode& si = shadow_in->inode; - mempool_inode& i = in->inode; - if (si.version > i.version) { + if (si->version > i->version) { // uh, what? results->inode.error_str << "On-disk inode is newer than in-memory one; "; goto next; } else { bool divergent = false; - int r = i.compare(si, &divergent); + int r = i->compare(*si, &divergent); results->inode.passed = !divergent && r >= 0; if (!results->inode.passed) { results->inode.error_str << @@ -4727,8 +4841,8 @@ next: results->raw_stats.checked = true; results->raw_stats.ondisk_read_retval = rval; - results->raw_stats.memory_value.dirstat = in->inode.dirstat; - results->raw_stats.memory_value.rstat = in->inode.rstat; + results->raw_stats.memory_value.dirstat = in->get_inode()->dirstat; + results->raw_stats.memory_value.rstat = in->get_inode()->rstat; frag_info_t& dir_info = results->raw_stats.ondisk_value.dirstat; nest_info_t& nest_info = results->raw_stats.ondisk_value.rstat; @@ -4761,8 +4875,8 @@ next: nest_info.rsnaps += srnode->snaps.size(); // ...and that their sum matches our inode settings - if (!dir_info.same_sums(in->inode.dirstat) || - !nest_info.same_sums(in->inode.rstat)) { + if (!dir_info.same_sums(in->get_inode()->dirstat) || + !nest_info.same_sums(in->get_inode()->rstat)) { if (in->scrub_infop->header->get_repair()) { results->raw_stats.error_str << "freshly-calculated rstats don't match existing ones (will be fixed)"; @@ -5034,11 +5148,11 @@ void CInode::scrub_info_create() const // break out of const-land to set up implicit initial state CInode *me = const_cast(this); - mempool_inode *in = me->get_projected_inode(); + const auto& pi = me->get_projected_inode(); scrub_info_t *si = new scrub_info_t(); - si->scrub_start_stamp = si->last_scrub_stamp = in->last_scrub_stamp; - si->scrub_start_version = si->last_scrub_version = in->last_scrub_version; + si->scrub_start_stamp = si->last_scrub_stamp = pi->last_scrub_stamp; + si->scrub_start_version = si->last_scrub_version = pi->last_scrub_version; me->scrub_infop = si; } @@ -5222,8 +5336,8 @@ int64_t CInode::get_backtrace_pool() const } else { // Files are required to have an explicit layout that specifies // a pool - ceph_assert(inode.layout.pool_id != -1); - return inode.layout.pool_id; + ceph_assert(get_inode()->layout.pool_id != -1); + return get_inode()->layout.pool_id; } } @@ -5309,7 +5423,7 @@ void CInode::maybe_ephemeral_dist(bool update) dout(15) << __func__ << " !dir or !normal: cannot ephemeral distributed pin " << *this << dendl; set_ephemeral_dist(false); return; - } else if (get_inode().nlink == 0) { + } else if (get_inode()->nlink == 0) { dout(15) << __func__ << " unlinked directory: cannot ephemeral distributed pin " << *this << dendl; set_ephemeral_dist(false); return; @@ -5326,7 +5440,7 @@ void CInode::maybe_ephemeral_dist(bool update) return; } - bool pin = dir->get_inode()->get_inode().export_ephemeral_distributed_pin; + bool pin = dir->get_inode()->get_inode()->export_ephemeral_distributed_pin; if (pin) { dout(10) << __func__ << " ephemeral distributed pinning " << *this << dendl; set_ephemeral_dist(true); @@ -5345,12 +5459,12 @@ void CInode::maybe_ephemeral_dist_children(bool update) } else if (!is_dir() || !is_normal()) { dout(15) << __func__ << " !dir or !normal: cannot ephemeral distributed pin " << *this << dendl; return; - } else if (get_inode().nlink == 0) { + } else if (get_inode()->nlink == 0) { dout(15) << __func__ << " unlinked directory: cannot ephemeral distributed pin " << *this << dendl; return; } - bool pin = get_inode().export_ephemeral_distributed_pin; + bool pin = get_inode()->export_ephemeral_distributed_pin; /* FIXME: expensive to iterate children when not updating */ if (!pin && !update) { return; @@ -5398,7 +5512,7 @@ void CInode::maybe_ephemeral_rand(bool fresh, double threshold) dout(15) << __func__ << " !dir or !normal: cannot ephemeral random pin " << *this << dendl; set_ephemeral_rand(false); return; - } else if (get_inode().nlink == 0) { + } else if (get_inode()->nlink == 0) { dout(15) << __func__ << " unlinked directory: cannot ephemeral random pin " << *this << dendl; set_ephemeral_rand(false); return; @@ -5432,22 +5546,20 @@ void CInode::maybe_ephemeral_rand(bool fresh, double threshold) void CInode::setxattr_ephemeral_rand(double probability) { ceph_assert(is_dir()); - ceph_assert(is_projected()); - get_projected_inode()->export_ephemeral_random_pin = probability; + _get_projected_inode()->export_ephemeral_random_pin = probability; } void CInode::setxattr_ephemeral_dist(bool val) { ceph_assert(is_dir()); - ceph_assert(is_projected()); - get_projected_inode()->export_ephemeral_distributed_pin = val; + _get_projected_inode()->export_ephemeral_distributed_pin = val; } void CInode::set_export_pin(mds_rank_t rank) { ceph_assert(is_dir()); - ceph_assert(is_projected()); - get_projected_inode()->export_pin = rank; + _get_projected_inode()->export_pin = rank; + maybe_export_pin(true); } void CInode::check_pin_policy() @@ -5460,12 +5572,12 @@ void CInode::check_pin_policy() const CDentry *pdn = in->get_parent_dn(); if (!pdn) break; - if (in->get_inode().nlink == 0) { + if (in->get_inode()->nlink == 0) { // ignore export pin for unlinked directory return; } else if (etarget != MDS_RANK_NONE && in->has_ephemeral_policy()) { return; - } else if (in->get_inode().export_pin >= 0) { + } else if (in->get_inode()->export_pin >= 0) { /* clear any epin policy */ set_ephemeral_dist(false); set_ephemeral_rand(false); @@ -5493,13 +5605,13 @@ mds_rank_t CInode::get_export_pin(bool inherit, bool ephemeral) const const CDentry *pdn = in->get_parent_dn(); if (!pdn) break; - if (in->get_inode().nlink == 0) { + if (in->get_inode()->nlink == 0) { // ignore export pin for unlinked directory return MDS_RANK_NONE; } else if (etarget != MDS_RANK_NONE && in->has_ephemeral_policy()) { return etarget; - } else if (in->get_inode().export_pin >= 0) { - return in->get_inode().export_pin; + } else if (in->get_inode()->export_pin >= 0) { + return in->get_inode()->export_pin; } else if (etarget == MDS_RANK_NONE && ephemeral && in->is_ephemerally_pinned()) { /* If a parent overrides a grandparent ephemeral pin policy with an export pin, we use that export pin instead. */ etarget = mdcache->hash_into_rank_bucket(in->ino()); @@ -5528,16 +5640,16 @@ double CInode::get_ephemeral_rand(bool inherit) const if (!pdn) break; // ignore export pin for unlinked directory - if (in->get_inode().nlink == 0) + if (in->get_inode()->nlink == 0) break; - if (in->get_inode().export_ephemeral_random_pin > 0.0) - return std::min(in->get_inode().export_ephemeral_random_pin, max); + if (in->get_inode()->export_ephemeral_random_pin > 0.0) + return std::min(in->get_inode()->export_ephemeral_random_pin, max); /* An export_pin overrides only if no closer parent (incl. this one) has a * random pin set. */ - if (in->get_inode().export_pin >= 0) + if (in->get_inode()->export_pin >= 0) return 0.0; if (!inherit) diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 1e055cf99f9db..c9d014067370d 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -21,6 +21,7 @@ #include #include "common/config.h" +#include "common/RefCountedObj.h" #include "include/counter.h" #include "include/elist.h" #include "include/types.h" @@ -67,17 +68,55 @@ struct cinode_lock_info_t { */ class InodeStoreBase { public: - typedef inode_t mempool_inode; - typedef old_inode_t mempool_old_inode; - typedef mempool::mds_co::compact_map mempool_old_inode_map; - typedef xattr_map mempool_xattr_map; // FIXME bufferptr not in mempool + using mempool_inode = inode_t; + using inode_ptr = std::shared_ptr; + using inode_const_ptr = std::shared_ptr; + + template + static inode_ptr allocate_inode(Args && ...args) { + static mempool::mds_co::pool_allocator allocator; + return std::allocate_shared(allocator, std::forward(args)...); + } + + using mempool_xattr_map = xattr_map; // FIXME bufferptr not in mempool + using xattr_map_ptr = std::shared_ptr; + using xattr_map_const_ptr = std::shared_ptr; + + template + static xattr_map_ptr allocate_xattr_map(Args && ...args) { + static mempool::mds_co::pool_allocator allocator; + return std::allocate_shared(allocator, std::forward(args)...); + } + + using mempool_old_inode = old_inode_t; + using mempool_old_inode_map = mempool::mds_co::map; + using old_inode_map_ptr = std::shared_ptr; + using old_inode_map_const_ptr = std::shared_ptr; + + template + static old_inode_map_ptr allocate_old_inode_map(Args && ...args) { + static mempool::mds_co::pool_allocator allocator; + return std::allocate_shared(allocator, std::forward(args)...); + } - InodeStoreBase() {} + void reset_inode(inode_const_ptr&& ptr) { + inode = std::move(ptr); + } + + void reset_xattrs(xattr_map_const_ptr&& ptr) { + xattrs = std::move(ptr); + } + + void reset_old_inodes(old_inode_map_const_ptr&& ptr) { + old_inodes = std::move(ptr); + } + + void encode_xattrs(bufferlist &bl) const; + void decode_xattrs(bufferlist::const_iterator &p); + void encode_old_inodes(bufferlist &bl, uint64_t features) const; + void decode_old_inodes(bufferlist::const_iterator &p); /* Helpers */ - bool is_file() const { return inode.is_file(); } - bool is_symlink() const { return inode.is_symlink(); } - bool is_dir() const { return inode.is_dir(); } static object_t get_object_name(inodeno_t ino, frag_t fg, std::string_view suffix); /* Full serialization for use in ".inode" root inode objects */ @@ -99,13 +138,20 @@ public: __u32 hash_dentry_name(std::string_view dn); frag_t pick_dirfrag(std::string_view dn); - mempool_inode inode; // the inode itself - mempool::mds_co::string symlink; // symlink dest, if symlink - mempool_xattr_map xattrs; - fragtree_t dirfragtree; // dir frag tree, if any. always consistent with our dirfrag map. - mempool_old_inode_map old_inodes; // key = last, value.first = first - snapid_t oldest_snap = CEPH_NOSNAP; - damage_flags_t damage_flags = 0; + mempool::mds_co::string symlink; // symlink dest, if symlink + fragtree_t dirfragtree; // dir frag tree, if any. always consistent with our dirfrag map. + snapid_t oldest_snap = CEPH_NOSNAP; + damage_flags_t damage_flags = 0; + +protected: + static inode_const_ptr empty_inode; + + // Following members are pointers to constant data, the constant data can + // be shared by CInode and log events. To update these members in CInode, + // read-copy-update should be used. + inode_const_ptr inode = empty_inode; + xattr_map_const_ptr xattrs; + old_inode_map_const_ptr old_inodes; // key = last, value.first = first }; inline void decode_noshare(InodeStoreBase::mempool_xattr_map& xattrs, @@ -116,6 +162,13 @@ inline void decode_noshare(InodeStoreBase::mempool_xattr_map& xattrs, class InodeStore : public InodeStoreBase { public: + mempool_inode* get_inode() { + if (inode == empty_inode) + reset_inode(allocate_inode()); + return const_cast(inode.get()); + } + mempool_xattr_map* get_xattrs() { return const_cast(xattrs.get()); } + void encode(ceph::buffer::list &bl, uint64_t features) const { InodeStoreBase::encode(bl, features, &snap_blob); } @@ -131,7 +184,11 @@ public: static void generate_test_instances(std::list& ls); - // FIXME ceph::buffer::list not part of mempool + using InodeStoreBase::inode; + using InodeStoreBase::xattrs; + using InodeStoreBase::old_inodes; + + // FIXME bufferlist not part of mempool ceph::buffer::list snap_blob; // Encoded copy of SnapRealm, because we can't // rehydrate it without full MDCache }; @@ -243,32 +300,6 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter xattrs; - sr_t *snapnode = UNDEF_SRNODE; - }; - // -- pins -- static const int PIN_DIRFRAG = -1; static const int PIN_CAPS = 2; // client caps @@ -368,7 +399,6 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter 1 || // there are remote links, possibly snapped, that will need to find me - !old_inodes.empty(); // once multiversion, always multiversion. until old_inodes gets cleaned out. + get_inode()->is_dir() || // links to me in other snaps + get_inode()->nlink > 1 || // there are remote links, possibly snapped, that will need to find me + is_any_old_inodes(); // once multiversion, always multiversion. until old_inodes gets cleaned out. } snapid_t get_oldest_snap(); @@ -469,51 +499,85 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counterversion; else - return projected_nodes.back().inode.version; + return projected_nodes.back().inode->version; } bool is_projected() const { return !projected_nodes.empty(); } - const mempool_inode *get_projected_inode() const { + const inode_const_ptr& get_projected_inode() const { if (projected_nodes.empty()) - return &inode; + return get_inode(); else - return &projected_nodes.back().inode; + return projected_nodes.back().inode; + } + // inode should have already been projected in caller's context + mempool_inode* _get_projected_inode() { + ceph_assert(!projected_nodes.empty()); + return const_cast(projected_nodes.back().inode.get()); } - mempool_inode *get_projected_inode() { + const inode_const_ptr& get_previous_projected_inode() const { + ceph_assert(!projected_nodes.empty()); + auto it = projected_nodes.rbegin(); + ++it; + if (it != projected_nodes.rend()) + return it->inode; + else + return get_inode(); + } + + const xattr_map_const_ptr& get_projected_xattrs() { if (projected_nodes.empty()) - return &inode; + return xattrs; else - return &projected_nodes.back().inode; + return projected_nodes.back().xattrs; } - mempool_inode *get_previous_projected_inode() { + const xattr_map_const_ptr& get_previous_projected_xattrs() { ceph_assert(!projected_nodes.empty()); auto it = projected_nodes.rbegin(); ++it; if (it != projected_nodes.rend()) - return &it->inode; + return it->xattrs; else - return &inode; + return xattrs; } - mempool_xattr_map *get_projected_xattrs(); - mempool_xattr_map *get_previous_projected_xattrs(); - sr_t *prepare_new_srnode(snapid_t snapid); void project_snaprealm(sr_t *new_srnode); sr_t *project_snaprealm(snapid_t snapid=0) { @@ -533,9 +597,9 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter& snaps); @@ -598,15 +662,22 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter split_need_snapflush(CInode *cowin, CInode *in); // -- accessors -- - bool is_root() const { return inode.ino == MDS_INO_ROOT; } - bool is_stray() const { return MDS_INO_IS_STRAY(inode.ino); } + + inodeno_t ino() const { return get_inode()->ino; } + vinodeno_t vino() const { return vinodeno_t(ino(), last); } + int d_type() const { return IFTODT(get_inode()->mode); } + bool is_root() const { return ino() == MDS_INO_ROOT; } + bool is_stray() const { return MDS_INO_IS_STRAY(ino()); } mds_rank_t get_stray_owner() const { - return (mds_rank_t)MDS_INO_STRAY_OWNER(inode.ino); + return (mds_rank_t)MDS_INO_STRAY_OWNER(ino()); } - bool is_mdsdir() const { return MDS_INO_IS_MDSDIR(inode.ino); } - bool is_base() const { return MDS_INO_IS_BASE(inode.ino); } - bool is_system() const { return inode.ino < MDS_INO_SYSTEM_BASE; } + bool is_mdsdir() const { return MDS_INO_IS_MDSDIR(ino()); } + bool is_base() const { return MDS_INO_IS_BASE(ino()); } + bool is_system() const { return ino() < MDS_INO_SYSTEM_BASE; } bool is_normal() const { return !(is_base() || is_system() || is_stray()); } + bool is_file() const { return get_inode()->is_file(); } + bool is_symlink() const { return get_inode()->is_symlink(); } + bool is_dir() const { return get_inode()->is_dir(); } bool is_head() const { return last == CEPH_NOSNAP; } @@ -621,12 +692,22 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter(inode.get()); + } + + const xattr_map_const_ptr& get_xattrs() const { return xattrs; } + + bool is_any_old_inodes() const { return old_inodes && !old_inodes->empty(); } + const old_inode_map_const_ptr& get_old_inodes() const { return old_inodes; } - mempool_inode& get_inode() { return inode; } - const mempool_inode& get_inode() const { return inode; } CDentry* get_parent_dn() { return parent; } const CDentry* get_parent_dn() const { return parent; } CDentry* get_projected_parent_dn() { return !projected_parent.empty() ? projected_parent.back() : parent; } @@ -656,11 +737,11 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counterversion; } version_t pre_dirty(); void _mark_dirty(LogSegment *ls); - void mark_dirty(version_t projected_dirv, LogSegment *ls); + void mark_dirty(LogSegment *ls); void mark_clean(); void store(MDSContext *fin); @@ -837,7 +918,8 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter 0.0 || - get_inode().export_ephemeral_distributed_pin; + return get_inode()->export_ephemeral_random_pin > 0.0 || + get_inode()->export_ephemeral_distributed_pin; } bool is_ephemerally_pinned() const { return state_test(STATE_DISTEPHEMERALPIN) || @@ -1152,8 +1234,18 @@ private: bool _validate_disk_state(class ValidationContinuation *c, int rval, int stage); - mempool::mds_co::list projected_nodes; // projected values (only defined while dirty) - size_t num_projected_xattrs = 0; + struct projected_const_node { + inode_const_ptr inode; + xattr_map_const_ptr xattrs; + sr_t *snapnode; + + projected_const_node() = delete; + projected_const_node(projected_const_node&&) = default; + explicit projected_const_node(const inode_const_ptr& i, const xattr_map_const_ptr& x, sr_t *s) : + inode(i), xattrs(x), snapnode(s) {} + }; + + mempool::mds_co::list projected_nodes; // projected values (only defined while dirty) size_t num_projected_srnodes = 0; // -- cache infrastructure -- diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 4dbc1bd37a08a..9bd7df5eb74d9 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -2106,7 +2106,7 @@ void Locker::xlock_downgrade(SimpleLock *lock, MutationImpl *mut) version_t Locker::issue_file_data_version(CInode *in) { dout(7) << "issue_file_data_version on " << *in << dendl; - return in->inode.file_data_version; + return in->get_inode()->file_data_version; } class C_Locker_FileUpdate_finish : public LockerLogContext { @@ -2316,9 +2316,9 @@ int Locker::issue_caps(CInode *in, Capability *only_cap) allowed |= cap->get_lock_cache_allowed(); } - if ((in->inode.inline_data.version != CEPH_INLINE_NONE && + if ((in->get_inode()->inline_data.version != CEPH_INLINE_NONE && cap->is_noinline()) || - (!in->inode.layout.pool_ns.empty() && + (!in->get_inode()->layout.pool_ns.empty() && cap->is_nopoolns())) allowed &= ~(CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR); @@ -2367,7 +2367,7 @@ int Locker::issue_caps(CInode *in, Capability *only_cap) } // notify clients about deleted inode, to make sure they release caps ASAP. - if (in->inode.nlink == 0) + if (in->get_inode()->nlink == 0) wanted |= CEPH_CAP_LINK_SHARED; // are there caps that the client _wants_ and can have, but aren't pending? @@ -2455,7 +2455,7 @@ void Locker::revoke_stale_cap(CInode *in, client_t client) cap->revoke(); - if (in->is_auth() && in->inode.client_ranges.count(cap->get_client())) + if (in->is_auth() && in->get_inode()->client_ranges.count(cap->get_client())) in->state_set(CInode::STATE_NEEDSRECOVER); if (in->state_test(CInode::STATE_EXPORTINGCAPS)) @@ -2513,7 +2513,7 @@ bool Locker::revoke_stale_caps(Session *session) eval_lock_caches(cap); if (in->is_auth() && - in->inode.client_ranges.count(cap->get_client())) + in->get_inode()->client_ranges.count(cap->get_client())) in->state_set(CInode::STATE_NEEDSRECOVER); // eval lock/inode may finish contexts, which may modify other cap's position @@ -2676,7 +2676,7 @@ public: } }; -uint64_t Locker::calc_new_max_size(CInode::mempool_inode *pi, uint64_t size) +uint64_t Locker::calc_new_max_size(const CInode::inode_const_ptr &pi, uint64_t size) { uint64_t new_max = (size + 1) << 1; uint64_t max_inc = g_conf()->mds_client_writeable_range_max_inc_objs; @@ -2691,7 +2691,7 @@ void Locker::calc_new_client_ranges(CInode *in, uint64_t size, bool update, CInode::mempool_inode::client_range_map *new_ranges, bool *max_increased) { - auto latest = in->get_projected_inode(); + const auto& latest = in->get_projected_inode(); uint64_t ms; if (latest->has_layout()) { ms = calc_new_max_size(latest, size); @@ -2706,8 +2706,9 @@ void Locker::calc_new_client_ranges(CInode *in, uint64_t size, bool update, if ((p.second.issued() | p.second.wanted()) & CEPH_CAP_ANY_FILE_WR) { client_writeable_range_t& nr = (*new_ranges)[p.first]; nr.range.first = 0; - if (latest->client_ranges.count(p.first)) { - client_writeable_range_t& oldr = latest->client_ranges[p.first]; + auto it = latest->client_ranges.find(p.first); + if (it != latest->client_ranges.end()) { + const client_writeable_range_t& oldr = it->second; if (ms > oldr.range.last) *max_increased = true; nr.range.last = std::max(ms, oldr.range.last); @@ -2733,7 +2734,7 @@ bool Locker::check_inode_max_size(CInode *in, bool force_wrlock, ceph_assert(in->is_auth()); ceph_assert(in->is_file()); - CInode::mempool_inode *latest = in->get_projected_inode(); + const auto& latest = in->get_projected_inode(); CInode::mempool_inode::client_range_map new_ranges; uint64_t size = latest->size; bool update_size = new_size > 0; @@ -2792,24 +2793,24 @@ bool Locker::check_inode_max_size(CInode *in, bool force_wrlock, MutationRef mut(new MutationImpl()); mut->ls = mds->mdlog->get_current_segment(); - auto &pi = in->project_inode(); - pi.inode.version = in->pre_dirty(); + auto pi = in->project_inode(); + pi.inode->version = in->pre_dirty(); if (update_max) { - dout(10) << "check_inode_max_size client_ranges " << pi.inode.client_ranges << " -> " << new_ranges << dendl; - pi.inode.client_ranges = new_ranges; + dout(10) << "check_inode_max_size client_ranges " << pi.inode->client_ranges << " -> " << new_ranges << dendl; + pi.inode->client_ranges = new_ranges; } if (update_size) { - dout(10) << "check_inode_max_size size " << pi.inode.size << " -> " << new_size << dendl; - pi.inode.size = new_size; - pi.inode.rstat.rbytes = new_size; - dout(10) << "check_inode_max_size mtime " << pi.inode.mtime << " -> " << new_mtime << dendl; - pi.inode.mtime = new_mtime; - if (new_mtime > pi.inode.ctime) { - pi.inode.ctime = new_mtime; - if (new_mtime > pi.inode.rstat.rctime) - pi.inode.rstat.rctime = new_mtime; + dout(10) << "check_inode_max_size size " << pi.inode->size << " -> " << new_size << dendl; + pi.inode->size = new_size; + pi.inode->rstat.rbytes = new_size; + dout(10) << "check_inode_max_size mtime " << pi.inode->mtime << " -> " << new_mtime << dendl; + pi.inode->mtime = new_mtime; + if (new_mtime > pi.inode->ctime) { + pi.inode->ctime = new_mtime; + if (new_mtime > pi.inode->rstat.rctime) + pi.inode->rstat.rctime = new_mtime; } } @@ -3519,25 +3520,32 @@ void Locker::_do_snap_update(CInode *in, snapid_t snap, int dirty, snapid_t foll m->xattrbl.length() && m->head.xattr_version > in->get_projected_inode()->xattr_version; - CInode::mempool_old_inode *oi = 0; - if (in->is_multiversion()) { - oi = in->pick_old_inode(snap); + CInode::mempool_old_inode *oi = nullptr; + CInode::old_inode_map_ptr _old_inodes; + if (in->is_any_old_inodes()) { + auto last = in->pick_old_inode(snap); + if (last) { + _old_inodes = CInode::allocate_old_inode_map(*in->get_old_inodes()); + oi = &_old_inodes->at(last); + if (snap > oi->first) { + (*_old_inodes)[snap - 1] = *oi;; + oi->first = snap; + } + } } CInode::mempool_inode *i; if (oi) { dout(10) << " writing into old inode" << dendl; - auto &pi = in->project_inode(); - pi.inode.version = in->pre_dirty(); - if (snap > oi->first) - in->split_old_inode(snap); + auto pi = in->project_inode(); + pi.inode->version = in->pre_dirty(); i = &oi->inode; if (xattrs) px = &oi->xattrs; } else { - auto &pi = in->project_inode(xattrs); - pi.inode.version = in->pre_dirty(); - i = &pi.inode; + auto pi = in->project_inode(xattrs); + pi.inode->version = in->pre_dirty(); + i = pi.inode.get(); if (xattrs) px = pi.xattrs.get(); } @@ -3566,6 +3574,9 @@ void Locker::_do_snap_update(CInode *in, snapid_t snap, int dirty, snapid_t foll } } + if (_old_inodes) + in->reset_old_inodes(std::move(_old_inodes)); + mut->auth_pin(in); mdcache->predirty_journal_parents(mut, &le->metablob, in, 0, PREDIRTY_PRIMARY, 0, follows); mdcache->journal_dirty_inode(mut.get(), &le->metablob, in, follows); @@ -3618,19 +3629,19 @@ void Locker::_update_cap_fields(CInode *in, int dirty, const cref_t if (mtime > pi->rstat.rctime) pi->rstat.rctime = mtime; } - if (in->inode.is_file() && // ONLY if regular file + if (in->is_file() && // ONLY if regular file size > pi->size) { dout(7) << " size " << pi->size << " -> " << size << " for " << *in << dendl; pi->size = size; pi->rstat.rbytes = size; } - if (in->inode.is_file() && + if (in->is_file() && (dirty & CEPH_CAP_FILE_WR) && inline_version > pi->inline_data.version) { pi->inline_data.version = inline_version; if (inline_version != CEPH_INLINE_NONE && m->inline_data.length() > 0) - pi->inline_data.get_data() = m->inline_data; + pi->inline_data.set_data(m->inline_data); else pi->inline_data.free_data(); } @@ -3691,12 +3702,16 @@ bool Locker::_do_cap_update(CInode *in, Capability *cap, << " on " << *in << dendl; ceph_assert(in->is_auth()); client_t client = m->get_source().num(); - CInode::mempool_inode *latest = in->get_projected_inode(); + const auto& latest = in->get_projected_inode(); // increase or zero max_size? uint64_t size = m->get_size(); bool change_max = false; - uint64_t old_max = latest->client_ranges.count(client) ? latest->client_ranges[client].range.last : 0; + uint64_t old_max; + { + auto it = latest->client_ranges.find(client); + old_max = it != latest->client_ranges.end() ? it->second.range.last: 0; + } uint64_t new_max = old_max; if (in->is_file()) { @@ -3797,26 +3812,26 @@ bool Locker::_do_cap_update(CInode *in, Capability *cap, m->xattrbl.length() && m->head.xattr_version > in->get_projected_inode()->xattr_version; - auto &pi = in->project_inode(xattr); - pi.inode.version = in->pre_dirty(); + auto pi = in->project_inode(xattr); + pi.inode->version = in->pre_dirty(); MutationRef mut(new MutationImpl()); mut->ls = mds->mdlog->get_current_segment(); - _update_cap_fields(in, dirty, m, &pi.inode); + _update_cap_fields(in, dirty, m, pi.inode.get()); if (change_max) { dout(7) << " max_size " << old_max << " -> " << new_max << " for " << *in << dendl; if (new_max) { - auto &cr = pi.inode.client_ranges[client]; + auto &cr = pi.inode->client_ranges[client]; cr.range.first = 0; cr.range.last = new_max; cr.follows = in->first - 1; if (cap) cap->mark_clientwriteable(); } else { - pi.inode.client_ranges.erase(client); + pi.inode->client_ranges.erase(client); if (cap) cap->clear_clientwriteable(); } @@ -3831,8 +3846,8 @@ bool Locker::_do_cap_update(CInode *in, Capability *cap, // xattrs update? if (xattr) { - dout(7) << " xattrs v" << pi.inode.xattr_version << " -> " << m->head.xattr_version << dendl; - pi.inode.xattr_version = m->head.xattr_version; + dout(7) << " xattrs v" << pi.inode->xattr_version << " -> " << m->head.xattr_version << dendl; + pi.inode->xattr_version = m->head.xattr_version; auto p = m->xattrbl.cbegin(); decode_noshare(*pi.xattrs, p); wrlock_force(&in->xattrlock, mut); @@ -3968,7 +3983,7 @@ void Locker::remove_client_cap(CInode *in, Capability *cap, bool kill) if (in->is_auth()) { // make sure we clear out the client byte range if (in->get_projected_inode()->client_ranges.count(client) && - !(in->inode.nlink == 0 && !in->is_any_caps())) { // unless it's unlink + stray + !(in->get_inode()->nlink == 0 && !in->is_any_caps())) { // unless it's unlink + stray if (kill) in->state_set(CInode::STATE_NEEDSRECOVER); else @@ -4852,7 +4867,7 @@ public: void Locker::scatter_writebehind(ScatterLock *lock) { CInode *in = static_cast(lock->get_parent()); - dout(10) << "scatter_writebehind " << in->inode.mtime << " on " << *lock << " on " << *in << dendl; + dout(10) << "scatter_writebehind " << in->get_inode()->mtime << " on " << *lock << " on " << *in << dendl; // journal MutationRef mut(new MutationImpl()); @@ -4864,8 +4879,8 @@ void Locker::scatter_writebehind(ScatterLock *lock) in->pre_cow_old_inode(); // avoid cow mayhem - auto &pi = in->project_inode(); - pi.inode.version = in->pre_dirty(); + auto pi = in->project_inode(); + pi.inode->version = in->pre_dirty(); in->finish_scatter_gather_update(lock->get_type()); lock->start_flush(); diff --git a/src/mds/Locker.h b/src/mds/Locker.h index e7b0688aa5477..18f173c21479b 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -259,7 +259,7 @@ private: friend class LockerLogContext; bool any_late_revoking_caps(xlist const &revoking, double timeout) const; - uint64_t calc_new_max_size(CInode::mempool_inode *pi, uint64_t size); + uint64_t calc_new_max_size(const CInode::inode_const_ptr& pi, uint64_t size); MDSRank *mds; MDCache *mdcache; diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 6bbe7e8638eb3..f06d585266d43 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -272,9 +272,9 @@ void MDCache::log_stat() mds->logger->set(l_mds_inodes_with_caps, num_inodes_with_caps); mds->logger->set(l_mds_caps, Capability::count()); if (root) { - mds->logger->set(l_mds_root_rfiles, root->inode.rstat.rfiles); - mds->logger->set(l_mds_root_rbytes, root->inode.rstat.rbytes); - mds->logger->set(l_mds_root_rsnaps, root->inode.rstat.rsnaps); + mds->logger->set(l_mds_root_rfiles, root->get_inode()->rstat.rfiles); + mds->logger->set(l_mds_root_rbytes, root->get_inode()->rstat.rbytes); + mds->logger->set(l_mds_root_rsnaps, root->get_inode()->rstat.rsnaps); } } @@ -413,33 +413,31 @@ void MDCache::init_layouts() default_log_layout = gen_default_log_layout(*(mds->mdsmap)); } -void MDCache::create_unlinked_system_inode(CInode *in, inodeno_t ino, - int mode) const +void MDCache::create_unlinked_system_inode(CInode *in, inodeno_t ino, int mode) const { - in->inode.ino = ino; - in->inode.version = 1; - in->inode.xattr_version = 1; - in->inode.mode = 0500 | mode; - in->inode.size = 0; - in->inode.ctime = - in->inode.mtime = - in->inode.btime = ceph_clock_now(); - in->inode.nlink = 1; - in->inode.truncate_size = -1ull; - in->inode.change_attr = 0; - in->inode.export_pin = MDS_RANK_NONE; + auto _inode = in->_get_inode(); + _inode->ino = ino; + _inode->version = 1; + _inode->xattr_version = 1; + _inode->mode = 0500 | mode; + _inode->size = 0; + _inode->ctime = _inode->mtime = _inode->btime = ceph_clock_now(); + _inode->nlink = 1; + _inode->truncate_size = -1ull; + _inode->change_attr = 0; + _inode->export_pin = MDS_RANK_NONE; // FIPS zeroization audit 20191117: this memset is not security related. - memset(&in->inode.dir_layout, 0, sizeof(in->inode.dir_layout)); - if (in->inode.is_dir()) { - in->inode.dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash; - in->inode.rstat.rsubdirs = 1; /* itself */ - in->inode.rstat.rctime = in->inode.ctime; + memset(&_inode->dir_layout, 0, sizeof(_inode->dir_layout)); + if (_inode->is_dir()) { + _inode->dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash; + _inode->rstat.rsubdirs = 1; /* itself */ + _inode->rstat.rctime = in->get_inode()->ctime; } else { - in->inode.layout = default_file_layout; - ++in->inode.rstat.rfiles; + _inode->layout = default_file_layout; + ++_inode->rstat.rfiles; } - in->inode.accounted_rstat = in->inode.rstat; + _inode->accounted_rstat = _inode->rstat; if (in->is_base()) { if (in->is_root()) @@ -463,12 +461,13 @@ CInode *MDCache::create_system_inode(inodeno_t ino, int mode) CInode *MDCache::create_root_inode() { - CInode *i = create_system_inode(MDS_INO_ROOT, S_IFDIR|0755); - i->inode.uid = g_conf()->mds_root_ino_uid; - i->inode.gid = g_conf()->mds_root_ino_gid; - i->inode.layout = default_file_layout; - i->inode.layout.pool_id = mds->mdsmap->get_first_data_pool(); - return i; + CInode *in = create_system_inode(MDS_INO_ROOT, S_IFDIR|0755); + auto _inode = in->_get_inode(); + _inode->uid = g_conf()->mds_root_ino_uid; + _inode->gid = g_conf()->mds_root_ino_gid; + _inode->layout = default_file_layout; + _inode->layout.pool_id = mds->mdsmap->get_first_data_pool(); + return in; } void MDCache::create_empty_hierarchy(MDSGather *gather) @@ -482,7 +481,7 @@ void MDCache::create_empty_hierarchy(MDSGather *gather) rootdir->dir_rep = CDir::REP_ALL; //NONE; ceph_assert(rootdir->fnode.accounted_fragstat == rootdir->fnode.fragstat); - ceph_assert(rootdir->fnode.fragstat == root->inode.dirstat); + ceph_assert(rootdir->fnode.fragstat == root->get_inode()->dirstat); ceph_assert(rootdir->fnode.accounted_rstat == rootdir->fnode.rstat); /* Do no update rootdir rstat information of the fragment, rstat upkeep magic * assume version 0 is stale/invalid. @@ -492,10 +491,7 @@ void MDCache::create_empty_hierarchy(MDSGather *gather) rootdir->mark_dirty(rootdir->pre_dirty(), mds->mdlog->get_current_segment()); rootdir->commit(0, gather->new_sub()); - root->mark_clean(); - root->mark_dirty(root->pre_dirty(), mds->mdlog->get_current_segment()); - root->mark_dirty_parent(mds->mdlog->get_current_segment(), true); - root->flush(gather->new_sub()); + root->store(gather->new_sub()); } void MDCache::create_mydir_hierarchy(MDSGather *gather) @@ -517,9 +513,9 @@ void MDCache::create_mydir_hierarchy(MDSGather *gather) CDentry *sdn = mydir->add_primary_dentry(name.str(), stray); sdn->_mark_dirty(mds->mdlog->get_current_segment()); - stray->inode.dirstat = straydir->fnode.fragstat; + stray->_get_inode()->dirstat = straydir->fnode.fragstat; - mydir->fnode.rstat.add(stray->inode.rstat); + mydir->fnode.rstat.add(stray->get_inode()->rstat); mydir->fnode.fragstat.nsubdirs++; // save them straydir->mark_complete(); @@ -532,10 +528,11 @@ void MDCache::create_mydir_hierarchy(MDSGather *gather) mydir->fnode.accounted_fragstat = mydir->fnode.fragstat; mydir->fnode.accounted_rstat = mydir->fnode.rstat; - myin->inode.dirstat = mydir->fnode.fragstat; - myin->inode.rstat = mydir->fnode.rstat; - ++myin->inode.rstat.rsubdirs; - myin->inode.accounted_rstat = myin->inode.rstat; + auto inode = myin->_get_inode(); + inode->dirstat = mydir->fnode.fragstat; + inode->rstat = mydir->fnode.rstat; + ++inode->rstat.rsubdirs; + inode->accounted_rstat = inode->rstat; mydir->mark_complete(); mydir->mark_dirty(mydir->pre_dirty(), ls); @@ -565,15 +562,16 @@ void MDCache::_create_system_file(CDir *dir, std::string_view name, CInode *in, version_t dpv = dn->pre_dirty(); CDir *mdir = 0; - if (in->inode.is_dir()) { - in->inode.rstat.rsubdirs = 1; + auto inode = in->_get_inode(); + if (in->is_dir()) { + inode->rstat.rsubdirs = 1; mdir = in->get_or_open_dirfrag(this, frag_t()); mdir->mark_complete(); mdir->pre_dirty(); } else - in->inode.rstat.rfiles = 1; - in->inode.version = dn->pre_dirty(); + inode->rstat.rfiles = 1; + inode->version = dn->pre_dirty(); SnapRealm *realm = dir->get_inode()->find_snaprealm(); dn->first = in->first = realm->get_newest_seq() + 1; @@ -613,10 +611,9 @@ void MDCache::_create_system_file_finish(MutationRef& mut, CDentry *dn, version_ dn->mark_dirty(dpv, mut->ls); CInode *in = dn->get_linkage()->get_inode(); - in->inode.version--; - in->mark_dirty(in->inode.version + 1, mut->ls); + in->mark_dirty(mut->ls); - if (in->inode.is_dir()) { + if (in->is_dir()) { CDir *dir = in->get_dirfrag(frag_t()); ceph_assert(dir); dir->mark_dirty(1, mut->ls); @@ -1553,10 +1550,13 @@ CInode *MDCache::cow_inode(CInode *in, snapid_t last) ceph_assert(last >= in->first); CInode *oldin = new CInode(this, true, in->first, last); - oldin->inode = *in->get_previous_projected_inode(); - oldin->xattrs = *in->get_previous_projected_xattrs(); + auto _inode = CInode::allocate_inode(*in->get_previous_projected_inode()); + _inode->trim_client_ranges(last); + oldin->reset_inode(std::move(_inode)); + auto _xattrs = in->get_previous_projected_xattrs(); + oldin->reset_xattrs(std::move(_xattrs)); + oldin->symlink = in->symlink; - oldin->inode.trim_client_ranges(last); if (in->first < in->oldest_snap) in->oldest_snap = in->first; @@ -1758,7 +1758,7 @@ void MDCache::journal_cow_dentry(MutationImpl *mut, EMetaBlob *metablob, if (pcow_inode) *pcow_inode = oldin; CDentry *olddn = dn->dir->add_primary_dentry(dn->get_name(), oldin, oldfirst, follows); - oldin->inode.version = olddn->pre_dirty(); + oldin->_get_inode()->version = olddn->pre_dirty(); dout(10) << " olddn " << *olddn << dendl; bool need_snapflush = !oldin->client_snap_caps.empty(); if (need_snapflush) { @@ -1817,7 +1817,6 @@ void MDCache::project_rstat_inode_to_frag(CInode *cur, CDir *parent, snapid_t fi int linkunlink, SnapRealm *prealm) { CDentry *parentdn = cur->get_projected_parent_dn(); - CInode::mempool_inode *curi = cur->get_projected_inode(); if (cur->first > first) first = cur->first; @@ -1855,44 +1854,54 @@ void MDCache::project_rstat_inode_to_frag(CInode *cur, CDir *parent, snapid_t fi ceph_assert(cur->is_frozen_inode()); update = false; } - _project_rstat_inode_to_frag(*curi, std::max(first, floor), cur->last, parent, + // hacky + const CInode::mempool_inode *pi; + if (update && cur->is_projected()) { + pi = cur->_get_projected_inode(); + } else { + pi = cur->get_projected_inode().get(); + if (update) { + // new inode + ceph_assert(pi->rstat == pi->accounted_rstat); + update = false; + } + } + _project_rstat_inode_to_frag(pi, std::max(first, floor), cur->last, parent, linkunlink, update); } if (g_conf()->mds_snap_rstat) { for (const auto &p : cur->dirty_old_rstats) { - auto &old = cur->old_inodes[p]; + const auto &old = cur->get_old_inodes()->at(p); snapid_t ofirst = std::max(old.first, floor); auto it = snaps.lower_bound(ofirst); if (it == snaps.end() || *it > p) continue; if (p >= floor) - _project_rstat_inode_to_frag(old.inode, ofirst, p, parent, 0, false); + _project_rstat_inode_to_frag(&old.inode, ofirst, p, parent, 0, false); } } cur->dirty_old_rstats.clear(); } -void MDCache::_project_rstat_inode_to_frag(CInode::mempool_inode& inode, snapid_t ofirst, snapid_t last, +void MDCache::_project_rstat_inode_to_frag(const CInode::mempool_inode* inode, snapid_t ofirst, snapid_t last, CDir *parent, int linkunlink, bool update_inode) { dout(10) << "_project_rstat_inode_to_frag [" << ofirst << "," << last << "]" << dendl; - dout(20) << " inode rstat " << inode.rstat << dendl; - dout(20) << " inode accounted_rstat " << inode.accounted_rstat << dendl; + dout(20) << " inode rstat " << inode->rstat << dendl; + dout(20) << " inode accounted_rstat " << inode->accounted_rstat << dendl; nest_info_t delta; if (linkunlink == 0) { - delta.add(inode.rstat); - delta.sub(inode.accounted_rstat); + delta.add(inode->rstat); + delta.sub(inode->accounted_rstat); } else if (linkunlink < 0) { - delta.sub(inode.accounted_rstat); + delta.sub(inode->accounted_rstat); } else { - delta.add(inode.rstat); + delta.add(inode->rstat); } dout(20) << " delta " << delta << dendl; - if (update_inode) - inode.accounted_rstat = inode.rstat; while (last >= ofirst) { /* @@ -1985,12 +1994,15 @@ void MDCache::_project_rstat_inode_to_frag(CInode::mempool_inode& inode, snapid_ dout(20) << " project to [" << first << "," << last << "] " << *prstat << dendl; ceph_assert(last >= first); prstat->add(delta); - if (update_inode) - inode.accounted_rstat = inode.rstat; dout(20) << " result [" << first << "," << last << "] " << *prstat << " " << *parent << dendl; last = first-1; } + + if (update_inode) { + auto _inode = const_cast(inode); + _inode->accounted_rstat = _inode->rstat; + } } void MDCache::project_rstat_frag_to_inode(nest_info_t& rstat, nest_info_t& accounted_rstat, @@ -2004,25 +2016,31 @@ void MDCache::project_rstat_frag_to_inode(nest_info_t& rstat, nest_info_t& accou delta.sub(accounted_rstat); dout(20) << " delta " << delta << dendl; + CInode::old_inode_map_ptr _old_inodes; while (last >= ofirst) { CInode::mempool_inode *pi; snapid_t first; if (last == pin->last) { - pi = pin->get_projected_inode(); + pi = pin->_get_projected_inode(); first = std::max(ofirst, pin->first); if (first > pin->first) { - auto &old = pin->cow_old_inode(first-1, cow_head); + auto& old = pin->cow_old_inode(first-1, cow_head); dout(20) << " cloned old_inode rstat is " << old.inode.rstat << dendl; } } else { + if (!_old_inodes) { + _old_inodes = CInode::allocate_old_inode_map(); + if (pin->is_any_old_inodes()) + *_old_inodes = *pin->get_old_inodes(); + } if (last >= pin->first) { first = pin->first; pin->cow_old_inode(last, cow_head); } else { // our life is easier here because old_inodes is not sparse // (although it may not begin at snapid 1) - auto it = pin->old_inodes.lower_bound(last); - if (it == pin->old_inodes.end()) { + auto it = _old_inodes->lower_bound(last); + if (it == _old_inodes->end()) { dout(10) << " no old_inode <= " << last << ", done." << dendl; break; } @@ -2035,7 +2053,7 @@ void MDCache::project_rstat_frag_to_inode(nest_info_t& rstat, nest_info_t& accou if (it->first > last) { dout(10) << " splitting right old_inode [" << first << "," << it->first << "] to [" << (last+1) << "," << it->first << "]" << dendl; - pin->old_inodes[last] = it->second; + (*_old_inodes)[last] = it->second; it->second.first = last+1; pin->dirty_old_rstats.insert(it->first); } @@ -2043,11 +2061,11 @@ void MDCache::project_rstat_frag_to_inode(nest_info_t& rstat, nest_info_t& accou if (first < ofirst) { dout(10) << " splitting left old_inode [" << first << "," << last << "] to [" << first << "," << ofirst-1 << "]" << dendl; - pin->old_inodes[ofirst-1] = pin->old_inodes[last]; + (*_old_inodes)[ofirst-1] = (*_old_inodes)[last]; pin->dirty_old_rstats.insert(ofirst-1); - pin->old_inodes[last].first = first = ofirst; + (*_old_inodes)[last].first = first = ofirst; } - pi = &pin->old_inodes[last].inode; + pi = &(*_old_inodes)[last].inode; pin->dirty_old_rstats.insert(last); } dout(20) << " projecting to [" << first << "," << last << "] " << pi->rstat << dendl; @@ -2056,6 +2074,8 @@ void MDCache::project_rstat_frag_to_inode(nest_info_t& rstat, nest_info_t& accou last = first-1; } + if (_old_inodes) + pin->reset_old_inodes(std::move(_old_inodes)); } void MDCache::broadcast_quota_to_client(CInode *in, client_t exclude_ct, bool quota_change) @@ -2066,10 +2086,8 @@ void MDCache::broadcast_quota_to_client(CInode *in, client_t exclude_ct, bool qu if (!in->is_auth() || in->is_frozen()) return; - auto i = in->get_projected_inode(); - - if (!i->quota.is_enable() && - !quota_change) + const auto& pi = in->get_projected_inode(); + if (!pi->quota.is_enable() && !quota_change) return; // creaete snaprealm for quota inode (quota was set before mimic) @@ -2084,38 +2102,38 @@ void MDCache::broadcast_quota_to_client(CInode *in, client_t exclude_ct, bool qu if (exclude_ct >= 0 && exclude_ct != p.first) goto update; - if (cap->last_rbytes == i->rstat.rbytes && - cap->last_rsize == i->rstat.rsize()) + if (cap->last_rbytes == pi->rstat.rbytes && + cap->last_rsize == pi->rstat.rsize()) continue; - if (i->quota.max_files > 0) { - if (i->rstat.rsize() >= i->quota.max_files) + if (pi->quota.max_files > 0) { + if (pi->rstat.rsize() >= pi->quota.max_files) goto update; - if ((abs(cap->last_rsize - i->quota.max_files) >> 4) < - abs(cap->last_rsize - i->rstat.rsize())) + if ((abs(cap->last_rsize - pi->quota.max_files) >> 4) < + abs(cap->last_rsize - pi->rstat.rsize())) goto update; } - if (i->quota.max_bytes > 0) { - if (i->rstat.rbytes > i->quota.max_bytes - (i->quota.max_bytes >> 3)) + if (pi->quota.max_bytes > 0) { + if (pi->rstat.rbytes > pi->quota.max_bytes - (pi->quota.max_bytes >> 3)) goto update; - if ((abs(cap->last_rbytes - i->quota.max_bytes) >> 4) < - abs(cap->last_rbytes - i->rstat.rbytes)) + if ((abs(cap->last_rbytes - pi->quota.max_bytes) >> 4) < + abs(cap->last_rbytes - pi->rstat.rbytes)) goto update; } continue; update: - cap->last_rsize = i->rstat.rsize(); - cap->last_rbytes = i->rstat.rbytes; + cap->last_rsize = pi->rstat.rsize(); + cap->last_rbytes = pi->rstat.rbytes; auto msg = make_message(); msg->ino = in->ino(); - msg->rstat = i->rstat; - msg->quota = i->quota; + msg->rstat = pi->rstat; + msg->quota = pi->quota; mds->send_message_client_counted(msg, cap->get_session()); } for (const auto &it : in->get_replicas()) { @@ -2339,32 +2357,32 @@ void MDCache::predirty_journal_parents(MutationRef mut, EMetaBlob *blob, pin->pre_cow_old_inode(); // avoid cow mayhem! - auto &pi = pin->project_inode(); - pi.inode.version = pin->pre_dirty(); + auto pi = pin->project_inode(); + pi.inode->version = pin->pre_dirty(); // dirstat if (do_parent_mtime || linkunlink) { dout(20) << "predirty_journal_parents add_delta " << pf->fragstat << dendl; dout(20) << "predirty_journal_parents - " << pf->accounted_fragstat << dendl; bool touched_mtime = false, touched_chattr = false; - pi.inode.dirstat.add_delta(pf->fragstat, pf->accounted_fragstat, &touched_mtime, &touched_chattr); + pi.inode->dirstat.add_delta(pf->fragstat, pf->accounted_fragstat, &touched_mtime, &touched_chattr); pf->accounted_fragstat = pf->fragstat; if (touched_mtime) - pi.inode.mtime = pi.inode.ctime = pi.inode.dirstat.mtime; + pi.inode->mtime = pi.inode->ctime = pi.inode->dirstat.mtime; if (touched_chattr) - pi.inode.change_attr = pi.inode.dirstat.change_attr; - dout(20) << "predirty_journal_parents gives " << pi.inode.dirstat << " on " << *pin << dendl; + pi.inode->change_attr = pi.inode->dirstat.change_attr; + dout(20) << "predirty_journal_parents gives " << pi.inode->dirstat << " on " << *pin << dendl; if (parent->get_frag() == frag_t()) { // i.e., we are the only frag - if (pi.inode.dirstat.size() < 0) + if (pi.inode->dirstat.size() < 0) ceph_assert(!"negative dirstat size" == g_conf()->mds_verify_scatter); - if (pi.inode.dirstat.size() != pf->fragstat.size()) { + if (pi.inode->dirstat.size() != pf->fragstat.size()) { mds->clog->error() << "unmatched fragstat size on single dirfrag " - << parent->dirfrag() << ", inode has " << pi.inode.dirstat + << parent->dirfrag() << ", inode has " << pi.inode->dirstat << ", dirfrag has " << pf->fragstat; // trust the dirfrag for now - pi.inode.dirstat = pf->fragstat; + pi.inode->dirstat = pf->fragstat; ceph_assert(!"unmatched fragstat size" == g_conf()->mds_verify_scatter); } @@ -2405,13 +2423,13 @@ void MDCache::predirty_journal_parents(MutationRef mut, EMetaBlob *blob, pf->accounted_rstat = pf->rstat; if (parent->get_frag() == frag_t()) { // i.e., we are the only frag - if (pi.inode.rstat.rbytes != pf->rstat.rbytes) { + if (pi.inode->rstat.rbytes != pf->rstat.rbytes) { mds->clog->error() << "unmatched rstat rbytes on single dirfrag " - << parent->dirfrag() << ", inode has " << pi.inode.rstat + << parent->dirfrag() << ", inode has " << pi.inode->rstat << ", dirfrag has " << pf->rstat; // trust the dirfrag for now - pi.inode.rstat = pf->rstat; + pi.inode->rstat = pf->rstat; ceph_assert(!"unmatched rstat rbytes" == g_conf()->mds_verify_scatter); } @@ -3806,7 +3824,7 @@ bool MDCache::expire_recursive(CInode *in, expiremap &expiremap) /* Remote strays with linkage (i.e. hardlinks) should not be * expired, because they may be the target of * a rename() as the owning MDS shuts down */ - if (!tin->is_stray() && tin->inode.nlink) { + if (!tin->is_stray() && tin->get_inode()->nlink) { dout(10) << __func__ << ": stray still has linkage " << *tin << dendl; return true; } @@ -4713,8 +4731,8 @@ void MDCache::rejoin_scour_survivor_replicas(mds_rank_t from, const cref_tinode.ino = ino; + CInode *in = new CInode(this, true, 2, last); + in->_get_inode()->ino = ino; in->state_set(CInode::STATE_REJOINUNDEF); add_inode(in); rejoin_undef_inodes.insert(in); @@ -4729,8 +4747,8 @@ CDir *MDCache::rejoin_invent_dirfrag(dirfrag_t df) in = rejoin_invent_inode(df.ino, CEPH_NOSNAP); if (!in->is_dir()) { ceph_assert(in->state_test(CInode::STATE_REJOINUNDEF)); - in->inode.mode = S_IFDIR; - in->inode.dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash; + in->_get_inode()->mode = S_IFDIR; + in->_get_inode()->dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash; } CDir *dir = in->get_or_open_dirfrag(this, df.frag); dir->state_set(CDir::STATE_REJOINUNDEF); @@ -5036,9 +5054,11 @@ void MDCache::handle_cache_rejoin_ack(const cref_t &ack) if (!diri) { // barebones inode; the full inode loop below will clean up. diri = new CInode(this, false); - diri->inode.ino = p.first.ino; - diri->inode.mode = S_IFDIR; - diri->inode.dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash; + auto _inode = diri->_get_inode(); + _inode->ino = p.first.ino; + _inode->mode = S_IFDIR; + _inode->dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash; + add_inode(diri); if (MDS_INO_MDSDIR(from) == p.first.ino) { diri->inode_auth = mds_authority_t(from, CDIR_AUTH_UNKNOWN); @@ -5098,8 +5118,8 @@ void MDCache::handle_cache_rejoin_ack(const cref_t &ack) dout(10) << " had bad linkage for " << *dn << dendl; } - // hmm, did we have the proper linkage here? - if (dnl->is_null() && !q.second.is_null()) { + // hmm, did we have the proper linkage here? + if (dnl->is_null() && !q.second.is_null()) { if (q.second.is_remote()) { dn->dir->link_remote_inode(dn, q.second.remote_ino, q.second.remote_d_type); } else { @@ -5107,9 +5127,10 @@ void MDCache::handle_cache_rejoin_ack(const cref_t &ack) if (!in) { // barebones inode; assume it's dir, the full inode loop below will clean up. in = new CInode(this, false, q.second.first, q.first.snapid); - in->inode.ino = q.second.ino; - in->inode.mode = S_IFDIR; - in->inode.dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash; + auto _inode = in->_get_inode(); + _inode->ino = q.second.ino; + _inode->mode = S_IFDIR; + _inode->dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash; add_inode(in); dout(10) << " add inode " << *in << dendl; } else if (in->get_parent_dn()) { @@ -5120,7 +5141,7 @@ void MDCache::handle_cache_rejoin_ack(const cref_t &ack) dn->dir->link_primary_inode(dn, in); isolated_inodes.erase(in); } - } + } dn->set_replica_nonce(q.second.nonce); dn->lock.set_state_rejoin(q.second.lock, rejoin_waiters, survivor); @@ -5628,7 +5649,7 @@ void MDCache::choose_lock_states_and_reconnect_caps() if (in->last != CEPH_NOSNAP) continue; - if (in->is_auth() && !in->is_base() && in->inode.is_dirty_rstat()) + if (in->is_auth() && !in->is_base() && in->get_inode()->is_dirty_rstat()) in->mark_dirty_rstat(); int dirty_caps = 0; @@ -6068,7 +6089,7 @@ void MDCache::opened_undef_inode(CInode *in) { rejoin_undef_inodes.erase(in); if (in->is_dir()) { // FIXME: re-hash dentries if necessary - ceph_assert(in->inode.dir_layout.dl_dir_hash == g_conf()->mds_default_dir_hash); + ceph_assert(in->get_inode()->dir_layout.dl_dir_hash == g_conf()->mds_default_dir_hash); if (in->get_num_dirfrags() && !in->dirfragtree.is_leaf(frag_t())) { CDir *dir = in->get_dirfrag(frag_t()); ceph_assert(dir); @@ -6396,14 +6417,12 @@ void MDCache::identify_files_to_recover() } bool recover = false; - for (map::iterator p = in->inode.client_ranges.begin(); - p != in->inode.client_ranges.end(); - ++p) { - Capability *cap = in->get_client_cap(p->first); + for (auto& p : in->get_inode()->client_ranges) { + Capability *cap = in->get_client_cap(p.first); if (cap) { cap->mark_clientwriteable(); } else { - dout(10) << " client." << p->first << " has range " << p->second << " but no cap on " << *in << dendl; + dout(10) << " client." << p.first << " has range " << p.second << " but no cap on " << *in << dendl; recover = true; break; } @@ -6467,7 +6486,7 @@ public: void MDCache::truncate_inode(CInode *in, LogSegment *ls) { - auto pi = in->get_projected_inode(); + const auto& pi = in->get_projected_inode(); dout(10) << "truncate_inode " << pi->truncate_from << " -> " << pi->truncate_size << " on " << *in @@ -6505,7 +6524,7 @@ struct C_IO_MDC_TruncateFinish : public MDCacheIOContext { void MDCache::_truncate_inode(CInode *in, LogSegment *ls) { - auto pi = &in->inode; + const auto& pi = in->get_inode(); dout(10) << "_truncate_inode " << pi->truncate_from << " -> " << pi->truncate_size << " on " << *in << dendl; @@ -6528,7 +6547,8 @@ void MDCache::_truncate_inode(CInode *in, LogSegment *ls) ceph_assert(in->last == CEPH_NOSNAP); } dout(10) << "_truncate_inode snapc " << snapc << " on " << *in << dendl; - filer.truncate(in->inode.ino, &in->inode.layout, *snapc, + auto layout = pi->layout; + filer.truncate(in->ino(), &layout, *snapc, pi->truncate_size, pi->truncate_from-pi->truncate_size, pi->truncate_seq, ceph::real_time::min(), 0, new C_OnFinisher(new C_IO_MDC_TruncateFinish(this, in, ls), @@ -6554,10 +6574,10 @@ void MDCache::truncate_inode_finish(CInode *in, LogSegment *ls) ls->truncating_inodes.erase(p); // update - auto &pi = in->project_inode(); - pi.inode.version = in->pre_dirty(); - pi.inode.truncate_from = 0; - pi.inode.truncate_pending--; + auto pi = in->project_inode(); + pi.inode->version = in->pre_dirty(); + pi.inode->truncate_from = 0; + pi.inode->truncate_pending--; MutationRef mut(new MutationImpl()); mut->ls = mds->mdlog->get_current_segment(); @@ -9232,7 +9252,7 @@ void MDCache::handle_open_ino(const cref_t &m, int err) break; CInode *diri = pdn->get_dir()->get_inode(); reply->ancestors.push_back(inode_backpointer_t(diri->ino(), pdn->get_name(), - in->inode.version)); + in->get_version())); in = diri; } } else { @@ -10090,7 +10110,7 @@ void MDCache::scan_stray_dir(dirfrag_t next) CDentry::linkage_t *dnl = dn->get_projected_linkage(); if (dnl->is_primary()) { CInode *in = dnl->get_inode(); - if (in->inode.nlink == 0) + if (in->get_inode()->nlink == 0) in->state_set(CInode::STATE_ORPHAN); maybe_eval_stray(in); } @@ -10777,9 +10797,10 @@ void MDCache::encode_replica_dentry(CDentry *dn, mds_rank_t to, bufferlist& bl) void MDCache::encode_replica_inode(CInode *in, mds_rank_t to, bufferlist& bl, uint64_t features) { - ENCODE_START(2, 1, bl); ceph_assert(in->is_auth()); - encode(in->inode.ino, bl); // bleh, minor assymetry here + + ENCODE_START(2, 1, bl); + encode(in->ino(), bl); // bleh, minor assymetry here encode(in->last, bl); __u32 nonce = in->add_replica(to); @@ -10896,7 +10917,7 @@ void MDCache::decode_replica_inode(CInode *&in, bufferlist::const_iterator& p, C decode(nonce, p); in = get_inode(ino, last); if (!in) { - in = new CInode(this, false, 1, last); + in = new CInode(this, false, 2, last); in->set_replica_nonce(nonce); in->_decode_base(p); in->_decode_locks_state_for_replica(p, true); @@ -11957,8 +11978,8 @@ void MDCache::dispatch_fragment_dir(MDRequestRef& mdr) // dft lock if (diri->is_auth()) { // journal dirfragtree - auto &pi = diri->project_inode(); - pi.inode.version = diri->pre_dirty(); + auto pi = diri->project_inode(); + pi.inode->version = diri->pre_dirty(); journal_dirty_inode(mdr.get(), &le->metablob, diri); } else { mds->locker->mark_updated_scatterlock(&diri->dirfragtreelock); @@ -12387,8 +12408,8 @@ void MDCache::rollback_uncommitted_fragments() } if (diri_auth) { - auto &pi = diri->project_inode(); - pi.inode.version = diri->pre_dirty(); + auto pi = diri->project_inode(); + pi.inode->version = diri->pre_dirty(); diri->pop_and_dirty_projected_inode(ls); // hacky le->metablob.add_primary_dentry(diri->get_projected_parent_dn(), diri, true); } else { @@ -13159,8 +13180,8 @@ do_rdlocks: } } - if (!dir_info.same_sums(diri->inode.dirstat) || - !nest_info.same_sums(diri->inode.rstat)) { + if (!dir_info.same_sums(diri->get_inode()->dirstat) || + !nest_info.same_sums(diri->get_inode()->rstat)) { dout(10) << __func__ << " failed to fix fragstat/rstat on " << *diri << dendl; } @@ -13193,9 +13214,9 @@ void MDCache::upgrade_inode_snaprealm_work(MDRequestRef& mdr) return; // project_snaprealm() upgrades snaprealm format - auto &pi = in->project_inode(false, true); + auto pi = in->project_inode(false, true); mdr->add_projected_inode(in); - pi.inode.version = in->pre_dirty(); + pi.inode->version = in->pre_dirty(); mdr->ls = mds->mdlog->get_current_segment(); EUpdate *le = new EUpdate(mds->mdlog, "upgrade_snaprealm"); @@ -13324,7 +13345,7 @@ void MDCache::register_perfcounters() * away. */ void MDCache::maybe_eval_stray(CInode *in, bool delay) { - if (in->inode.nlink > 0 || in->is_base() || is_readonly() || + if (in->get_inode()->nlink > 0 || in->is_base() || is_readonly() || mds->get_state() <= MDSMap::STATE_REJOIN) return; diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 8760766f9d592..e6debb30e017c 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -413,7 +413,7 @@ class MDCache { void project_rstat_inode_to_frag(CInode *cur, CDir *parent, snapid_t first, int linkunlink, SnapRealm *prealm); - void _project_rstat_inode_to_frag(CInode::mempool_inode & inode, snapid_t ofirst, snapid_t last, + void _project_rstat_inode_to_frag(const CInode::mempool_inode* inode, snapid_t ofirst, snapid_t last, CDir *parent, int linkunlink, bool update_inode); void project_rstat_frag_to_inode(nest_info_t& rstat, nest_info_t& accounted_rstat, snapid_t ofirst, snapid_t last, diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 1589dc8482cb6..faee202f846cd 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -1626,7 +1626,7 @@ void Migrator::encode_export_inode(CInode *in, bufferlist& enc_state, dout(7) << *in << dendl; ceph_assert(!in->is_replica(mds->get_nodeid())); - encode(in->inode.ino, enc_state); + encode(in->ino(), enc_state); encode(in->last, enc_state); in->encode_export(enc_state); @@ -3193,7 +3193,7 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp, in = cache->get_inode(ino, last); if (!in) { - in = new CInode(mds->mdcache, true, 1, last); + in = new CInode(mds->mdcache, true, 2, last); added = true; } @@ -3222,7 +3222,7 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp, dout(10) << " had " << *in << dendl; } - if (in->inode.is_dirty_rstat()) + if (in->get_inode()->is_dirty_rstat()) in->mark_dirty_rstat(); // clear if dirtyscattered, since we're going to journal this diff --git a/src/mds/RecoveryQueue.cc b/src/mds/RecoveryQueue.cc index 2a101a2126200..1abc6a2530a97 100644 --- a/src/mds/RecoveryQueue.cc +++ b/src/mds/RecoveryQueue.cc @@ -89,7 +89,7 @@ void RecoveryQueue::advance() void RecoveryQueue::_start(CInode *in) { - auto pi = in->get_projected_inode(); + const auto& pi = in->get_projected_inode(); // blech if (pi->client_ranges.size() && !pi->get_max_size()) { @@ -99,13 +99,14 @@ void RecoveryQueue::_start(CInode *in) auto p = file_recovering.find(in); if (pi->client_ranges.size() && pi->get_max_size()) { - dout(10) << "starting " << in->inode.size << " " << pi->client_ranges + dout(10) << "starting " << pi->size << " " << pi->client_ranges << " " << *in << dendl; if (p == file_recovering.end()) { file_recovering.insert(make_pair(in, false)); C_MDC_Recover *fin = new C_MDC_Recover(this, in); - filer.probe(in->inode.ino, &in->inode.layout, in->last, + auto layout = pi->layout; + filer.probe(in->ino(), &layout, in->last, pi->get_max_size(), &fin->size, &fin->mtime, false, 0, fin); } else { @@ -113,7 +114,7 @@ void RecoveryQueue::_start(CInode *in) dout(10) << "already working on " << *in << ", set need_restart flag" << dendl; } } else { - dout(10) << "skipping " << in->inode.size << " " << *in << dendl; + dout(10) << "skipping " << pi->size << " " << *in << dendl; if (p == file_recovering.end()) { in->state_clear(CInode::STATE_RECOVERING); mds->locker->eval(in, CEPH_LOCK_IFILE); diff --git a/src/mds/ScrubStack.cc b/src/mds/ScrubStack.cc index e73d441e3a73a..e28fc305bdce7 100644 --- a/src/mds/ScrubStack.cc +++ b/src/mds/ScrubStack.cc @@ -440,7 +440,7 @@ void ScrubStack::_validate_inode_done(CInode *in, int r, { // Record backtrace fails as remote linkage damage, as // we may not be able to resolve hard links to this inode - mdcache->mds->damage_table.notify_remote_damaged(in->inode.ino, path); + mdcache->mds->damage_table.notify_remote_damaged(in->ino(), path); } else if (result.inode.checked && !result.inode.passed && !result.inode.repaired) { // Record damaged inode structures as damaged dentries as diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 300de98f76643..29cd9f36782bb 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -3188,9 +3188,10 @@ CDentry* Server::prepare_stray_dentry(MDRequestRef& mdr, CInode *in) * create a new inode. set c/m/atime. hit dir pop. */ CInode* Server::prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino, unsigned mode, - file_layout_t *layout) + const file_layout_t *layout) { CInode *in = new CInode(mdcache); + auto _inode = in->_get_inode(); // Server::prepare_force_open_sessions() can re-open session in closing // state. In that corner case, session's prealloc_inos are being freed. @@ -3199,7 +3200,7 @@ CInode* Server::prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino bool allow_prealloc_inos = mdr->session->is_open(); // assign ino - if (allow_prealloc_inos && (mdr->used_prealloc_ino = in->inode.ino = mdr->session->take_ino(useino))) { + if (allow_prealloc_inos && (mdr->used_prealloc_ino = _inode->ino = mdr->session->take_ino(useino))) { mds->sessionmap.mark_projected(mdr->session); dout(10) << "prepare_new_inode used_prealloc " << mdr->used_prealloc_ino << " (" << mdr->session->info.prealloc_inos @@ -3207,15 +3208,15 @@ CInode* Server::prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino << dendl; } else { mdr->alloc_ino = - in->inode.ino = mds->inotable->project_alloc_id(useino); + _inode->ino = mds->inotable->project_alloc_id(useino); dout(10) << "prepare_new_inode alloc " << mdr->alloc_ino << dendl; } - if (useino && useino != in->inode.ino) { - dout(0) << "WARNING: client specified " << useino << " and i allocated " << in->inode.ino << dendl; + if (useino && useino != _inode->ino) { + dout(0) << "WARNING: client specified " << useino << " and i allocated " << _inode->ino << dendl; mds->clog->error() << mdr->client_request->get_source() << " specified ino " << useino - << " but mds." << mds->get_nodeid() << " allocated " << in->inode.ino; + << " but mds." << mds->get_nodeid() << " allocated " << _inode->ino; //ceph_abort(); // just for now. } @@ -3229,64 +3230,60 @@ CInode* Server::prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino dout(10) << "prepare_new_inode prealloc " << mdr->prealloc_inos << dendl; } - in->inode.version = 1; - in->inode.xattr_version = 1; - in->inode.nlink = 1; // FIXME + _inode->version = 1; + _inode->xattr_version = 1; + _inode->nlink = 1; // FIXME - in->inode.mode = mode; + _inode->mode = mode; // FIPS zeroization audit 20191117: this memset is not security related. - memset(&in->inode.dir_layout, 0, sizeof(in->inode.dir_layout)); - if (in->inode.is_dir()) { - in->inode.dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash; + memset(&_inode->dir_layout, 0, sizeof(_inode->dir_layout)); + if (_inode->is_dir()) { + _inode->dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash; } else if (layout) { - in->inode.layout = *layout; + _inode->layout = *layout; } else { - in->inode.layout = mdcache->default_file_layout; + _inode->layout = mdcache->default_file_layout; } - in->inode.truncate_size = -1ull; // not truncated, yet! - in->inode.truncate_seq = 1; /* starting with 1, 0 is kept for no-truncation logic */ + _inode->truncate_size = -1ull; // not truncated, yet! + _inode->truncate_seq = 1; /* starting with 1, 0 is kept for no-truncation logic */ CInode *diri = dir->get_inode(); - dout(10) << oct << " dir mode 0" << diri->inode.mode << " new mode 0" << mode << dec << dendl; + dout(10) << oct << " dir mode 0" << diri->get_inode()->mode << " new mode 0" << mode << dec << dendl; - if (diri->inode.mode & S_ISGID) { + if (diri->get_inode()->mode & S_ISGID) { dout(10) << " dir is sticky" << dendl; - in->inode.gid = diri->inode.gid; + _inode->gid = diri->get_inode()->gid; if (S_ISDIR(mode)) { dout(10) << " new dir also sticky" << dendl; - in->inode.mode |= S_ISGID; + _inode->mode |= S_ISGID; } } else - in->inode.gid = mdr->client_request->get_caller_gid(); + _inode->gid = mdr->client_request->get_caller_gid(); - in->inode.uid = mdr->client_request->get_caller_uid(); + _inode->uid = mdr->client_request->get_caller_uid(); - in->inode.btime = in->inode.ctime = in->inode.mtime = in->inode.atime = + _inode->btime = _inode->ctime = _inode->mtime = _inode->atime = mdr->get_op_stamp(); - in->inode.change_attr = 0; + _inode->change_attr = 0; const cref_t &req = mdr->client_request; if (req->get_data().length()) { auto p = req->get_data().cbegin(); // xattrs on new inode? - CInode::mempool_xattr_map xattrs; - decode_noshare(xattrs, p); - for (const auto &p : xattrs) { - dout(10) << "prepare_new_inode setting xattr " << p.first << dendl; - auto em = in->xattrs.emplace(std::piecewise_construct, std::forward_as_tuple(p.first), std::forward_as_tuple(p.second)); - if (!em.second) - em.first->second = p.second; - } + auto _xattrs = CInode::allocate_xattr_map(); + decode_noshare(*_xattrs, p); + dout(10) << "prepare_new_inode setting xattrs " << *_xattrs << dendl; + in->reset_xattrs(std::move(_xattrs)); } if (!mds->mdsmap->get_inline_data_enabled() || !mdr->session->get_connection()->has_feature(CEPH_FEATURE_MDS_INLINE_DATA)) - in->inode.inline_data.version = CEPH_INLINE_NONE; + _inode->inline_data.version = CEPH_INLINE_NONE; mdcache->add_inode(in); // add dout(10) << "prepare_new_inode " << *in << dendl; @@ -4084,11 +4081,11 @@ void Server::handle_client_open(MDRequestRef& mdr) return; } - if (!cur->inode.is_file()) { + if (!cur->is_file()) { // can only open non-regular inode with mode FILE_MODE_PIN, at least for now. cmode = CEPH_FILE_MODE_PIN; // the inode is symlink and client wants to follow it, ignore the O_TRUNC flag. - if (cur->inode.is_symlink() && !(flags & CEPH_O_NOFOLLOW)) + if (cur->is_symlink() && !(flags & CEPH_O_NOFOLLOW)) flags &= ~CEPH_O_TRUNC; } @@ -4103,20 +4100,20 @@ void Server::handle_client_open(MDRequestRef& mdr) respond_to_request(mdr, -ENXIO); // FIXME what error do we want? return; }*/ - if ((flags & CEPH_O_DIRECTORY) && !cur->inode.is_dir() && !cur->inode.is_symlink()) { + if ((flags & CEPH_O_DIRECTORY) && !cur->is_dir() && !cur->is_symlink()) { dout(7) << "specified O_DIRECTORY on non-directory " << *cur << dendl; respond_to_request(mdr, -EINVAL); return; } - if ((flags & CEPH_O_TRUNC) && !cur->inode.is_file()) { + if ((flags & CEPH_O_TRUNC) && !cur->is_file()) { dout(7) << "specified O_TRUNC on !(file|symlink) " << *cur << dendl; // we should return -EISDIR for directory, return -EINVAL for other non-regular - respond_to_request(mdr, cur->inode.is_dir() ? -EISDIR : -EINVAL); + respond_to_request(mdr, cur->is_dir() ? -EISDIR : -EINVAL); return; } - if (cur->inode.inline_data.version != CEPH_INLINE_NONE && + if (cur->get_inode()->inline_data.version != CEPH_INLINE_NONE && !mdr->session->get_connection()->has_feature(CEPH_FEATURE_MDS_INLINE_DATA)) { dout(7) << "old client cannot open inline data file " << *cur << dendl; respond_to_request(mdr, -EPERM); @@ -4160,7 +4157,7 @@ void Server::handle_client_open(MDRequestRef& mdr) return; // wait for pending truncate? - const auto pi = cur->get_projected_inode(); + const auto& pi = cur->get_projected_inode(); if (pi->is_truncating()) { dout(10) << " waiting for pending truncate from " << pi->truncate_from << " to " << pi->truncate_size << " to complete on " << *cur << dendl; @@ -4254,8 +4251,7 @@ public: dn->pop_projected_linkage(); // dirty inode, dn, dir - newi->inode.version--; // a bit hacky, see C_MDS_mknod_finish - newi->mark_dirty(newi->inode.version+1, mdr->ls); + newi->mark_dirty(mdr->ls); newi->mark_dirty_parent(mdr->ls, true); mdr->apply(); @@ -4368,35 +4364,37 @@ void Server::handle_client_openc(MDRequestRef& mdr) mds->locker->create_lock_cache(mdr, diri, &mdr->dir_layout); // create inode. - CInode *in = prepare_new_inode(mdr, dn->get_dir(), inodeno_t(req->head.ino), - req->head.args.open.mode | S_IFREG, &layout); - ceph_assert(in); + CInode *newi = prepare_new_inode(mdr, dn->get_dir(), inodeno_t(req->head.ino), + req->head.args.open.mode | S_IFREG, &layout); + ceph_assert(newi); // it's a file. - dn->push_projected_linkage(in); + dn->push_projected_linkage(newi); - in->inode.version = dn->pre_dirty(); + auto _inode = newi->_get_inode(); + _inode->version = dn->pre_dirty(); if (layout.pool_id != mdcache->default_file_layout.pool_id) - in->inode.add_old_pool(mdcache->default_file_layout.pool_id); - in->inode.update_backtrace(); - in->inode.rstat.rfiles = 1; + _inode->add_old_pool(mdcache->default_file_layout.pool_id); + _inode->update_backtrace(); + _inode->rstat.rfiles = 1; + _inode->accounted_rstat = _inode->rstat; SnapRealm *realm = diri->find_snaprealm(); snapid_t follows = mdcache->get_global_snaprealm()->get_newest_seq(); ceph_assert(follows >= realm->get_newest_seq()); ceph_assert(dn->first == follows+1); - in->first = dn->first; + newi->first = dn->first; // do the open - Capability *cap = mds->locker->issue_new_caps(in, cmode, mdr, realm); - in->authlock.set_state(LOCK_EXCL); - in->xattrlock.set_state(LOCK_EXCL); + Capability *cap = mds->locker->issue_new_caps(newi, cmode, mdr, realm); + newi->authlock.set_state(LOCK_EXCL); + newi->xattrlock.set_state(LOCK_EXCL); if (cap && (cmode & CEPH_FILE_MODE_WR)) { - in->inode.client_ranges[client].range.first = 0; - in->inode.client_ranges[client].range.last = in->inode.layout.stripe_unit; - in->inode.client_ranges[client].follows = follows; + _inode->client_ranges[client].range.first = 0; + _inode->client_ranges[client].range.last = _inode->layout.stripe_unit; + _inode->client_ranges[client].follows = follows; cap->mark_clientwriteable(); } @@ -4406,19 +4404,19 @@ void Server::handle_client_openc(MDRequestRef& mdr) mdlog->start_entry(le); le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); journal_allocated_inos(mdr, &le->metablob); - mdcache->predirty_journal_parents(mdr, &le->metablob, in, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); - le->metablob.add_primary_dentry(dn, in, true, true, true); + mdcache->predirty_journal_parents(mdr, &le->metablob, newi, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); + le->metablob.add_primary_dentry(dn, newi, true, true, true); // make sure this inode gets into the journal - le->metablob.add_opened_ino(in->ino()); + le->metablob.add_opened_ino(newi->ino()); - C_MDS_openc_finish *fin = new C_MDS_openc_finish(this, mdr, dn, in); + C_MDS_openc_finish *fin = new C_MDS_openc_finish(this, mdr, dn, newi); if (mdr->session->info.has_feature(CEPHFS_FEATURE_DELEG_INO)) { openc_response_t ocresp; dout(10) << "adding created_ino and delegated_inos" << dendl; - ocresp.created_ino = in->inode.ino; + ocresp.created_ino = _inode->ino; if (delegate_inos_pct && !req->is_queued_for_replay()) { // Try to delegate some prealloc_inos to the client, if it's down to half the max @@ -4431,10 +4429,10 @@ void Server::handle_client_openc(MDRequestRef& mdr) } else if (mdr->client_request->get_connection()->has_feature(CEPH_FEATURE_REPLY_CREATE_INODE)) { dout(10) << "adding ino to reply to indicate inode was created" << dendl; // add the file created flag onto the reply if create_flags features is supported - encode(in->inode.ino, mdr->reply_extra_bl); + encode(newi->ino(), mdr->reply_extra_bl); } - journal_and_reply(mdr, in, dn, le, fin); + journal_and_reply(mdr, newi, dn, le, fin); // We hit_dir (via hit_inode) in our finish callback, but by then we might // have overshot the split size (multiple opencs in flight), so here is @@ -4717,7 +4715,7 @@ public: MDSRank *mds = get_mds(); // notify any clients - if (truncating_smaller && in->inode.is_truncating()) { + if (truncating_smaller && in->get_inode()->is_truncating()) { mds->locker->issue_truncate(in); mds->mdcache->truncate_inode(in, mdr->ls); } @@ -4923,17 +4921,17 @@ void Server::handle_client_setattr(MDRequestRef& mdr) if (!mds->locker->acquire_locks(mdr, lov)) return; - if ((mask & CEPH_SETATTR_UID) && (cur->inode.uid != req->head.args.setattr.uid)) + if ((mask & CEPH_SETATTR_UID) && (cur->get_inode()->uid != req->head.args.setattr.uid)) access_mask |= MAY_CHOWN; - if ((mask & CEPH_SETATTR_GID) && (cur->inode.gid != req->head.args.setattr.gid)) + if ((mask & CEPH_SETATTR_GID) && (cur->get_inode()->gid != req->head.args.setattr.gid)) access_mask |= MAY_CHGRP; if (!check_access(mdr, cur, access_mask)) return; // trunc from bigger -> smaller? - auto pip = cur->get_projected_inode(); + const auto& pip = cur->get_projected_inode(); uint64_t old_size = std::max(pip->size, req->head.args.setattr.old_size); @@ -4964,55 +4962,55 @@ void Server::handle_client_setattr(MDRequestRef& mdr) EUpdate *le = new EUpdate(mdlog, "setattr"); mdlog->start_entry(le); - auto &pi = cur->project_inode(); + auto pi = cur->project_inode(); if (mask & CEPH_SETATTR_UID) - pi.inode.uid = req->head.args.setattr.uid; + pi.inode->uid = req->head.args.setattr.uid; if (mask & CEPH_SETATTR_GID) - pi.inode.gid = req->head.args.setattr.gid; + pi.inode->gid = req->head.args.setattr.gid; if (mask & CEPH_SETATTR_MODE) - pi.inode.mode = (pi.inode.mode & ~07777) | (req->head.args.setattr.mode & 07777); + pi.inode->mode = (pi.inode->mode & ~07777) | (req->head.args.setattr.mode & 07777); else if ((mask & (CEPH_SETATTR_UID|CEPH_SETATTR_GID|CEPH_SETATTR_KILL_SGUID)) && - S_ISREG(pi.inode.mode) && - (pi.inode.mode & (S_IXUSR|S_IXGRP|S_IXOTH))) { - pi.inode.mode &= ~(S_ISUID|S_ISGID); + S_ISREG(pi.inode->mode) && + (pi.inode->mode & (S_IXUSR|S_IXGRP|S_IXOTH))) { + pi.inode->mode &= ~(S_ISUID|S_ISGID); } if (mask & CEPH_SETATTR_MTIME) - pi.inode.mtime = req->head.args.setattr.mtime; + pi.inode->mtime = req->head.args.setattr.mtime; if (mask & CEPH_SETATTR_ATIME) - pi.inode.atime = req->head.args.setattr.atime; + pi.inode->atime = req->head.args.setattr.atime; if (mask & CEPH_SETATTR_BTIME) - pi.inode.btime = req->head.args.setattr.btime; + pi.inode->btime = req->head.args.setattr.btime; if (mask & (CEPH_SETATTR_ATIME | CEPH_SETATTR_MTIME | CEPH_SETATTR_BTIME)) - pi.inode.time_warp_seq++; // maybe not a timewarp, but still a serialization point. + pi.inode->time_warp_seq++; // maybe not a timewarp, but still a serialization point. if (mask & CEPH_SETATTR_SIZE) { if (truncating_smaller) { - pi.inode.truncate(old_size, req->head.args.setattr.size); + pi.inode->truncate(old_size, req->head.args.setattr.size); le->metablob.add_truncate_start(cur->ino()); } else { - pi.inode.size = req->head.args.setattr.size; - pi.inode.rstat.rbytes = pi.inode.size; + pi.inode->size = req->head.args.setattr.size; + pi.inode->rstat.rbytes = pi.inode->size; } - pi.inode.mtime = mdr->get_op_stamp(); + pi.inode->mtime = mdr->get_op_stamp(); // adjust client's max_size? CInode::mempool_inode::client_range_map new_ranges; bool max_increased = false; - mds->locker->calc_new_client_ranges(cur, pi.inode.size, true, &new_ranges, &max_increased); - if (pi.inode.client_ranges != new_ranges) { - dout(10) << " client_ranges " << pi.inode.client_ranges << " -> " << new_ranges << dendl; - pi.inode.client_ranges = new_ranges; + mds->locker->calc_new_client_ranges(cur, pi.inode->size, true, &new_ranges, &max_increased); + if (pi.inode->client_ranges != new_ranges) { + dout(10) << " client_ranges " << pi.inode->client_ranges << " -> " << new_ranges << dendl; + pi.inode->client_ranges = new_ranges; changed_ranges = true; } } - pi.inode.version = cur->pre_dirty(); - pi.inode.ctime = mdr->get_op_stamp(); - if (mdr->get_op_stamp() > pi.inode.rstat.rctime) - pi.inode.rstat.rctime = mdr->get_op_stamp(); - pi.inode.change_attr++; + pi.inode->version = cur->pre_dirty(); + pi.inode->ctime = mdr->get_op_stamp(); + if (mdr->get_op_stamp() > pi.inode->rstat.rctime) + pi.inode->rstat.rctime = mdr->get_op_stamp(); + pi.inode->change_attr++; // log + wait le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); @@ -5045,24 +5043,24 @@ void Server::do_open_truncate(MDRequestRef& mdr, int cmode) mdlog->start_entry(le); // prepare - auto &pi = in->project_inode(); - pi.inode.version = in->pre_dirty(); - pi.inode.mtime = pi.inode.ctime = mdr->get_op_stamp(); - if (mdr->get_op_stamp() > pi.inode.rstat.rctime) - pi.inode.rstat.rctime = mdr->get_op_stamp(); - pi.inode.change_attr++; - - uint64_t old_size = std::max(pi.inode.size, mdr->client_request->head.args.open.old_size); + auto pi = in->project_inode(); + pi.inode->version = in->pre_dirty(); + pi.inode->mtime = pi.inode->ctime = mdr->get_op_stamp(); + if (mdr->get_op_stamp() > pi.inode->rstat.rctime) + pi.inode->rstat.rctime = mdr->get_op_stamp(); + pi.inode->change_attr++; + + uint64_t old_size = std::max(pi.inode->size, mdr->client_request->head.args.open.old_size); if (old_size > 0) { - pi.inode.truncate(old_size, 0); + pi.inode->truncate(old_size, 0); le->metablob.add_truncate_start(in->ino()); } bool changed_ranges = false; if (cap && (cmode & CEPH_FILE_MODE_WR)) { - pi.inode.client_ranges[client].range.first = 0; - pi.inode.client_ranges[client].range.last = pi.inode.get_layout_size_increment(); - pi.inode.client_ranges[client].follows = realm->get_newest_seq(); + pi.inode->client_ranges[client].range.first = 0; + pi.inode->client_ranges[client].range.last = pi.inode->get_layout_size_increment(); + pi.inode->client_ranges[client].follows = realm->get_newest_seq(); changed_ranges = true; cap->mark_clientwriteable(); } @@ -5161,15 +5159,15 @@ void Server::handle_client_setlayout(MDRequestRef& mdr) return; // project update - auto &pi = cur->project_inode(); - pi.inode.layout = layout; + auto pi = cur->project_inode(); + pi.inode->layout = layout; // add the old pool to the inode - pi.inode.add_old_pool(old_layout.pool_id); - pi.inode.version = cur->pre_dirty(); - pi.inode.ctime = mdr->get_op_stamp(); - if (mdr->get_op_stamp() > pi.inode.rstat.rctime) - pi.inode.rstat.rctime = mdr->get_op_stamp(); - pi.inode.change_attr++; + pi.inode->add_old_pool(old_layout.pool_id); + pi.inode->version = cur->pre_dirty(); + pi.inode->ctime = mdr->get_op_stamp(); + if (mdr->get_op_stamp() > pi.inode->rstat.rctime) + pi.inode->rstat.rctime = mdr->get_op_stamp(); + pi.inode->change_attr++; // log + wait mdr->ls = mdlog->get_current_segment(); @@ -5242,7 +5240,7 @@ void Server::handle_client_setdirlayout(MDRequestRef& mdr) return; // validate layout - const auto old_pi = cur->get_projected_inode(); + const auto& old_pi = cur->get_projected_inode(); file_layout_t layout; if (old_pi->has_layout()) layout = old_pi->layout; @@ -5289,9 +5287,9 @@ void Server::handle_client_setdirlayout(MDRequestRef& mdr) if (!check_access(mdr, cur, access)) return; - auto &pi = cur->project_inode(); - pi.inode.layout = layout; - pi.inode.version = cur->pre_dirty(); + auto pi = cur->project_inode(); + pi.inode->layout = layout; + pi.inode->version = cur->pre_dirty(); // log + wait mdr->ls = mdlog->get_current_segment(); @@ -5533,10 +5531,10 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur) if (check_layout_vxattr(mdr, rest, value, &layout) < 0) return; - auto &pi = cur->project_inode(); - pi.inode.layout = layout; + auto pi = cur->project_inode(); + pi.inode->layout = layout; mdr->no_early_reply = true; - pip = &pi.inode; + pip = pi.inode.get(); } else if (name.compare(0, 16, "ceph.file.layout") == 0) { if (!cur->is_file()) { respond_to_request(mdr, -EINVAL); @@ -5557,11 +5555,11 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur) if (!mds->locker->acquire_locks(mdr, lov)) return; - auto &pi = cur->project_inode(); - int64_t old_pool = pi.inode.layout.pool_id; - pi.inode.add_old_pool(old_pool); - pi.inode.layout = layout; - pip = &pi.inode; + auto pi = cur->project_inode(); + int64_t old_pool = pi.inode->layout.pool_id; + pi.inode->add_old_pool(old_pool); + pi.inode->layout = layout; + pip = pi.inode.get(); } else if (name.compare(0, 10, "ceph.quota") == 0) { if (!cur->is_dir() || cur->is_root()) { respond_to_request(mdr, -EINVAL); @@ -5583,8 +5581,8 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur) if (!xlock_policylock(mdr, cur, false, new_realm)) return; - auto &pi = cur->project_inode(false, new_realm); - pi.inode.quota = quota; + auto pi = cur->project_inode(false, new_realm); + pi.inode->quota = quota; if (new_realm) { SnapRealm *realm = cur->find_snaprealm(); @@ -5594,7 +5592,7 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur) newsnap.seq = seq; } mdr->no_early_reply = true; - pip = &pi.inode; + pip = pi.inode.get(); client_t exclude_ct = mdr->get_client(); mdcache->broadcast_quota_to_client(cur, exclude_ct, true); @@ -5617,9 +5615,9 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur) if (!xlock_policylock(mdr, cur)) return; - auto &pi = cur->project_inode(); + auto pi = cur->project_inode(); cur->set_export_pin(rank); - pip = &pi.inode; + pip = pi.inode.get(); } else if (name == "ceph.dir.pin.random"sv) { if (!cur->is_dir() || cur->is_root()) { respond_to_request(mdr, -EINVAL); @@ -5646,9 +5644,9 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur) if (!xlock_policylock(mdr, cur)) return; - auto &pi = cur->project_inode(); + auto pi = cur->project_inode(); cur->setxattr_ephemeral_rand(val); - pip = &pi.inode; + pip = pi.inode.get(); } else if (name == "ceph.dir.pin.distributed"sv) { if (!cur->is_dir() || cur->is_root()) { respond_to_request(mdr, -EINVAL); @@ -5667,9 +5665,9 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur) if (!xlock_policylock(mdr, cur)) return; - auto &pi = cur->project_inode(); + auto pi = cur->project_inode(); cur->setxattr_ephemeral_dist(val); - pip = &pi.inode; + pip = pi.inode.get(); } else { dout(10) << " unknown vxattr " << name << dendl; respond_to_request(mdr, -EINVAL); @@ -5725,9 +5723,9 @@ void Server::handle_remove_vxattr(MDRequestRef& mdr, CInode *cur) if (!mds->locker->acquire_locks(mdr, lov)) return; - auto &pi = cur->project_inode(); - pi.inode.clear_layout(); - pi.inode.version = cur->pre_dirty(); + auto pi = cur->project_inode(); + pi.inode->clear_layout(); + pi.inode->version = cur->pre_dirty(); // log + wait mdr->ls = mdlog->get_current_segment(); @@ -5808,32 +5806,36 @@ void Server::handle_client_setxattr(MDRequestRef& mdr) if (!check_access(mdr, cur, MAY_WRITE)) return; - auto pxattrs = cur->get_projected_xattrs(); size_t len = req->get_data().length(); size_t inc = len + name.length(); - // check xattrs kv pairs size - size_t cur_xattrs_size = 0; - for (const auto& p : *pxattrs) { - if ((flags & CEPH_XATTR_REPLACE) && (name.compare(p.first) == 0)) { - continue; + const auto& pxattrs = cur->get_projected_xattrs(); + if (pxattrs) { + // check xattrs kv pairs size + size_t cur_xattrs_size = 0; + for (const auto& p : *pxattrs) { + if ((flags & CEPH_XATTR_REPLACE) && name.compare(p.first) == 0) { + continue; + } + cur_xattrs_size += p.first.length() + p.second.length(); } - cur_xattrs_size += p.first.length() + p.second.length(); - } - if (((cur_xattrs_size + inc) > g_conf()->mds_max_xattr_pairs_size)) { - dout(10) << "xattr kv pairs size too big. cur_xattrs_size " - << cur_xattrs_size << ", inc " << inc << dendl; - respond_to_request(mdr, -ENOSPC); - return; - } + if (((cur_xattrs_size + inc) > g_conf()->mds_max_xattr_pairs_size)) { + dout(10) << "xattr kv pairs size too big. cur_xattrs_size " + << cur_xattrs_size << ", inc " << inc << dendl; + respond_to_request(mdr, -ENOSPC); + return; + } - if ((flags & CEPH_XATTR_CREATE) && pxattrs->count(mempool::mds_co::string(name))) { - dout(10) << "setxattr '" << name << "' XATTR_CREATE and EEXIST on " << *cur << dendl; - respond_to_request(mdr, -EEXIST); - return; + if ((flags & CEPH_XATTR_CREATE) && pxattrs->count(mempool::mds_co::string(name))) { + dout(10) << "setxattr '" << name << "' XATTR_CREATE and EEXIST on " << *cur << dendl; + respond_to_request(mdr, -EEXIST); + return; + } } - if ((flags & CEPH_XATTR_REPLACE) && !pxattrs->count(mempool::mds_co::string(name))) { + + if ((flags & CEPH_XATTR_REPLACE) && + !(pxattrs && pxattrs->count(mempool::mds_co::string(name)))) { dout(10) << "setxattr '" << name << "' XATTR_REPLACE and ENODATA on " << *cur << dendl; respond_to_request(mdr, -ENODATA); return; @@ -5842,21 +5844,20 @@ void Server::handle_client_setxattr(MDRequestRef& mdr) dout(10) << "setxattr '" << name << "' len " << len << " on " << *cur << dendl; // project update - auto &pi = cur->project_inode(true); - pi.inode.version = cur->pre_dirty(); - pi.inode.ctime = mdr->get_op_stamp(); - if (mdr->get_op_stamp() > pi.inode.rstat.rctime) - pi.inode.rstat.rctime = mdr->get_op_stamp(); - pi.inode.change_attr++; - pi.inode.xattr_version++; - auto &px = *pi.xattrs; + auto pi = cur->project_inode(true); + pi.inode->version = cur->pre_dirty(); + pi.inode->ctime = mdr->get_op_stamp(); + if (mdr->get_op_stamp() > pi.inode->rstat.rctime) + pi.inode->rstat.rctime = mdr->get_op_stamp(); + pi.inode->change_attr++; + pi.inode->xattr_version++; if ((flags & CEPH_XATTR_REMOVE)) { - px.erase(mempool::mds_co::string(name)); + pi.xattrs->erase(mempool::mds_co::string(name)); } else { bufferptr b = buffer::create(len); if (len) req->get_data().begin().copy(len, b.c_str()); - auto em = px.emplace(std::piecewise_construct, std::forward_as_tuple(mempool::mds_co::string(name)), std::forward_as_tuple(b)); + auto em = pi.xattrs->emplace(std::piecewise_construct, std::forward_as_tuple(mempool::mds_co::string(name)), std::forward_as_tuple(b)); if (!em.second) em.first->second = b; } @@ -5901,8 +5902,8 @@ void Server::handle_client_removexattr(MDRequestRef& mdr) if (!mds->locker->acquire_locks(mdr, lov)) return; - auto pxattrs = cur->get_projected_xattrs(); - if (pxattrs->count(mempool::mds_co::string(name)) == 0) { + const auto& pxattrs = cur->get_projected_xattrs(); + if (pxattrs && pxattrs->count(mempool::mds_co::string(name)) == 0) { dout(10) << "removexattr '" << name << "' and ENODATA on " << *cur << dendl; respond_to_request(mdr, -ENODATA); return; @@ -5911,14 +5912,14 @@ void Server::handle_client_removexattr(MDRequestRef& mdr) dout(10) << "removexattr '" << name << "' on " << *cur << dendl; // project update - auto &pi = cur->project_inode(true); + auto pi = cur->project_inode(true); auto &px = *pi.xattrs; - pi.inode.version = cur->pre_dirty(); - pi.inode.ctime = mdr->get_op_stamp(); - if (mdr->get_op_stamp() > pi.inode.rstat.rctime) - pi.inode.rstat.rctime = mdr->get_op_stamp(); - pi.inode.change_attr++; - pi.inode.xattr_version++; + pi.inode->version = cur->pre_dirty(); + pi.inode->ctime = mdr->get_op_stamp(); + if (mdr->get_op_stamp() > pi.inode->rstat.rctime) + pi.inode->rstat.rctime = mdr->get_op_stamp(); + pi.inode->change_attr++; + pi.inode->xattr_version++; px.erase(mempool::mds_co::string(name)); // log + wait @@ -5956,12 +5957,11 @@ public: // be a bit hacky with the inode version, here.. we decrement it // just to keep mark_dirty() happen. (we didn't bother projecting // a new version of hte inode since it's just been created) - newi->inode.version--; - newi->mark_dirty(newi->inode.version + 1, mdr->ls); + newi->mark_dirty(mdr->ls); newi->mark_dirty_parent(mdr->ls, true); // mkdir? - if (newi->inode.is_dir()) { + if (newi->is_dir()) { CDir *dir = newi->get_dirfrag(frag_t()); ceph_assert(dir); dir->fnode.version--; @@ -5974,9 +5974,9 @@ public: MDRequestRef null_ref; get_mds()->mdcache->send_dentry_link(dn, null_ref); - if (newi->inode.is_file()) { + if (newi->is_file()) { get_mds()->locker->share_inode_max_size(newi); - } else if (newi->inode.is_dir()) { + } else if (newi->is_dir()) { // We do this now so that the linkages on the new directory are stable. newi->maybe_ephemeral_dist(); newi->maybe_ephemeral_rand(true); @@ -6024,12 +6024,14 @@ void Server::handle_client_mknod(MDRequestRef& mdr) dn->push_projected_linkage(newi); - newi->inode.rdev = req->head.args.mknod.rdev; - newi->inode.version = dn->pre_dirty(); - newi->inode.rstat.rfiles = 1; + auto _inode = newi->_get_inode(); + _inode->version = dn->pre_dirty(); + _inode->rdev = req->head.args.mknod.rdev; + _inode->rstat.rfiles = 1; + _inode->accounted_rstat = _inode->rstat; if (layout.pool_id != mdcache->default_file_layout.pool_id) - newi->inode.add_old_pool(mdcache->default_file_layout.pool_id); - newi->inode.update_backtrace(); + _inode->add_old_pool(mdcache->default_file_layout.pool_id); + _inode->update_backtrace(); snapid_t follows = mdcache->get_global_snaprealm()->get_newest_seq(); SnapRealm *realm = dn->get_dir()->inode->find_snaprealm(); @@ -6037,7 +6039,7 @@ void Server::handle_client_mknod(MDRequestRef& mdr) // if the client created a _regular_ file via MKNOD, it's highly likely they'll // want to write to it (e.g., if they are reexporting NFS) - if (S_ISREG(newi->inode.mode)) { + if (S_ISREG(_inode->mode)) { // issue a cap on the file int cmode = CEPH_FILE_MODE_RDWR; Capability *cap = mds->locker->issue_new_caps(newi, cmode, mdr, realm); @@ -6050,9 +6052,9 @@ void Server::handle_client_mknod(MDRequestRef& mdr) newi->xattrlock.set_state(LOCK_EXCL); dout(15) << " setting a client_range too, since this is a regular file" << dendl; - newi->inode.client_ranges[client].range.first = 0; - newi->inode.client_ranges[client].range.last = newi->inode.layout.stripe_unit; - newi->inode.client_ranges[client].follows = follows; + _inode->client_ranges[client].range.first = 0; + _inode->client_ranges[client].range.last = _inode->layout.stripe_unit; + _inode->client_ranges[client].follows = follows; cap->mark_clientwriteable(); } } @@ -6060,7 +6062,7 @@ void Server::handle_client_mknod(MDRequestRef& mdr) ceph_assert(dn->first == follows + 1); newi->first = dn->first; - dout(10) << "mknod mode " << newi->inode.mode << " rdev " << newi->inode.rdev << dendl; + dout(10) << "mknod mode " << _inode->mode << " rdev " << _inode->rdev << dendl; // prepare finisher mdr->ls = mdlog->get_current_segment(); @@ -6110,9 +6112,11 @@ void Server::handle_client_mkdir(MDRequestRef& mdr) // it's a directory. dn->push_projected_linkage(newi); - newi->inode.version = dn->pre_dirty(); - newi->inode.rstat.rsubdirs = 1; - newi->inode.update_backtrace(); + auto _inode = newi->_get_inode(); + _inode->version = dn->pre_dirty(); + _inode->rstat.rsubdirs = 1; + _inode->accounted_rstat = _inode->rstat; + _inode->update_backtrace(); snapid_t follows = mdcache->get_global_snaprealm()->get_newest_seq(); SnapRealm *realm = dn->get_dir()->inode->find_snaprealm(); @@ -6189,11 +6193,13 @@ void Server::handle_client_symlink(MDRequestRef& mdr) dn->push_projected_linkage(newi); newi->symlink = req->get_path2(); - newi->inode.size = newi->symlink.length(); - newi->inode.rstat.rbytes = newi->inode.size; - newi->inode.rstat.rfiles = 1; - newi->inode.version = dn->pre_dirty(); - newi->inode.update_backtrace(); + auto _inode = newi->_get_inode(); + _inode->version = dn->pre_dirty(); + _inode->size = newi->symlink.length(); + _inode->rstat.rbytes = _inode->size; + _inode->rstat.rfiles = 1; + _inode->accounted_rstat = _inode->rstat; + _inode->update_backtrace(); newi->first = dn->first; @@ -6346,13 +6352,13 @@ void Server::_link_local(MDRequestRef& mdr, CDentry *dn, CInode *targeti) version_t tipv = targeti->pre_dirty(); // project inode update - auto &pi = targeti->project_inode(); - pi.inode.nlink++; - pi.inode.ctime = mdr->get_op_stamp(); - if (mdr->get_op_stamp() > pi.inode.rstat.rctime) - pi.inode.rstat.rctime = mdr->get_op_stamp(); - pi.inode.change_attr++; - pi.inode.version = tipv; + auto pi = targeti->project_inode(); + pi.inode->nlink++; + pi.inode->ctime = mdr->get_op_stamp(); + if (mdr->get_op_stamp() > pi.inode->rstat.rctime) + pi.inode->rstat.rctime = mdr->get_op_stamp(); + pi.inode->change_attr++; + pi.inode->version = tipv; bool adjust_realm = false; if (!targeti->is_projected_snaprealm_global()) { @@ -6603,7 +6609,7 @@ void Server::handle_peer_link_prep(MDRequestRef& mdr) EPeerUpdate::OP_PREPARE, EPeerUpdate::LINK); mdlog->start_entry(le); - auto &pi = dnl->get_inode()->project_inode(); + auto pi = dnl->get_inode()->project_inode(); // update journaled target inode bool inc; @@ -6611,7 +6617,7 @@ void Server::handle_peer_link_prep(MDRequestRef& mdr) bool realm_projected = false; if (mdr->peer_request->get_op() == MMDSPeerRequest::OP_LINKPREP) { inc = true; - pi.inode.nlink++; + pi.inode->nlink++; if (!targeti->is_projected_snaprealm_global()) { sr_t *newsnap = targeti->project_snaprealm(); targeti->mark_snaprealm_global(newsnap); @@ -6621,7 +6627,7 @@ void Server::handle_peer_link_prep(MDRequestRef& mdr) } } else { inc = false; - pi.inode.nlink--; + pi.inode->nlink--; if (targeti->is_projected_snaprealm_global()) { ceph_assert(mdr->peer_request->desti_snapbl.length()); auto p = mdr->peer_request->desti_snapbl.cbegin(); @@ -6629,7 +6635,7 @@ void Server::handle_peer_link_prep(MDRequestRef& mdr) sr_t *newsnap = targeti->project_snaprealm(); decode(*newsnap, p); - if (pi.inode.nlink == 0) + if (pi.inode->nlink == 0) ceph_assert(!newsnap->is_parent_global()); realm_projected = true; @@ -6641,7 +6647,7 @@ void Server::handle_peer_link_prep(MDRequestRef& mdr) link_rollback rollback; rollback.reqid = mdr->reqid; rollback.ino = targeti->ino(); - rollback.old_ctime = targeti->inode.ctime; // we hold versionlock xlock; no concorrent projections + rollback.old_ctime = targeti->get_inode()->ctime; // we hold versionlock xlock; no concorrent projections const fnode_t *pf = targeti->get_parent_dn()->get_dir()->get_projected_fnode(); rollback.old_dir_mtime = pf->fragstat.mtime; rollback.old_dir_rctime = pf->rstat.rctime; @@ -6657,10 +6663,10 @@ void Server::handle_peer_link_prep(MDRequestRef& mdr) encode(rollback, le->rollback); mdr->more()->rollback_bl = le->rollback; - pi.inode.ctime = mdr->get_op_stamp(); - pi.inode.version = targeti->pre_dirty(); + pi.inode->ctime = mdr->get_op_stamp(); + pi.inode->version = targeti->pre_dirty(); - dout(10) << " projected inode " << pi.inode.ino << " v " << pi.inode.version << dendl; + dout(10) << " projected inode " << pi.inode->ino << " v " << pi.inode->version << dendl; // commit case mdcache->predirty_journal_parents(mdr, &le->commit, dnl->get_inode(), 0, PREDIRTY_SHALLOW|PREDIRTY_PRIMARY); @@ -6789,8 +6795,8 @@ void Server::do_link_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef& dout(10) << " target is " << *in << dendl; ceph_assert(!in->is_projected()); // live peer request hold versionlock xlock. - auto &pi = in->project_inode(); - pi.inode.version = in->pre_dirty(); + auto pi = in->project_inode(); + pi.inode->version = in->pre_dirty(); mut->add_projected_inode(in); // parent dir rctime @@ -6798,20 +6804,20 @@ void Server::do_link_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef& fnode_t *pf = parent->project_fnode(); mut->add_projected_fnode(parent); pf->version = parent->pre_dirty(); - if (pf->fragstat.mtime == pi.inode.ctime) { + if (pf->fragstat.mtime == pi.inode->ctime) { pf->fragstat.mtime = rollback.old_dir_mtime; - if (pf->rstat.rctime == pi.inode.ctime) + if (pf->rstat.rctime == pi.inode->ctime) pf->rstat.rctime = rollback.old_dir_rctime; mut->add_updated_lock(&parent->get_inode()->filelock); mut->add_updated_lock(&parent->get_inode()->nestlock); } // inode - pi.inode.ctime = rollback.old_ctime; + pi.inode->ctime = rollback.old_ctime; if (rollback.was_inc) - pi.inode.nlink--; + pi.inode->nlink--; else - pi.inode.nlink++; + pi.inode->nlink++; map> splits; if (rollback.snapbl.length() && in->snaprealm) { @@ -7096,19 +7102,19 @@ void Server::_unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn) // the unlinked dentry dn->pre_dirty(); - auto &pi = in->project_inode(); + auto pi = in->project_inode(); { std::string t; dn->make_path_string(t, true); - pi.inode.stray_prior_path = std::move(t); - } - pi.inode.version = in->pre_dirty(); - pi.inode.ctime = mdr->get_op_stamp(); - if (mdr->get_op_stamp() > pi.inode.rstat.rctime) - pi.inode.rstat.rctime = mdr->get_op_stamp(); - pi.inode.change_attr++; - pi.inode.nlink--; - if (pi.inode.nlink == 0) + pi.inode->stray_prior_path = std::move(t); + } + pi.inode->version = in->pre_dirty(); + pi.inode->ctime = mdr->get_op_stamp(); + if (mdr->get_op_stamp() > pi.inode->rstat.rctime) + pi.inode->rstat.rctime = mdr->get_op_stamp(); + pi.inode->change_attr++; + pi.inode->nlink--; + if (pi.inode->nlink == 0) in->state_set(CInode::STATE_ORPHAN); if (mdr->more()->desti_srnode) { @@ -7124,7 +7130,7 @@ void Server::_unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn) mdcache->predirty_journal_parents(mdr, &le->metablob, in, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, -1); mdcache->predirty_journal_parents(mdr, &le->metablob, in, straydn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); - pi.inode.update_backtrace(); + pi.inode->update_backtrace(); le->metablob.add_primary_dentry(straydn, in, true, true); } else { mdr->add_projected_inode(in); @@ -8282,18 +8288,18 @@ void Server::_rename_prepare(MDRequestRef& mdr, ceph_assert(straydn); // moving to straydn. // link--, and move. if (destdn->is_auth()) { - auto &pi= oldin->project_inode(); //project_snaprealm - pi.inode.version = straydn->pre_dirty(pi.inode.version); - pi.inode.update_backtrace(); - tpi = &pi.inode; + auto pi= oldin->project_inode(); //project_snaprealm + pi.inode->version = straydn->pre_dirty(pi.inode->version); + pi.inode->update_backtrace(); + tpi = pi.inode.get(); } straydn->push_projected_linkage(oldin); } else if (destdnl->is_remote()) { // nlink-- targeti if (oldin->is_auth()) { - auto &pi = oldin->project_inode(); - pi.inode.version = oldin->pre_dirty(); - tpi = &pi.inode; + auto pi = oldin->project_inode(); + pi.inode->version = oldin->pre_dirty(); + tpi = pi.inode.get(); } } } @@ -8307,16 +8313,16 @@ void Server::_rename_prepare(MDRequestRef& mdr, destdn->push_projected_linkage(srcdnl->get_remote_ino(), srcdnl->get_remote_d_type()); // srci if (srci->is_auth()) { - auto &pi = srci->project_inode(); - pi.inode.version = srci->pre_dirty(); - spi = &pi.inode; + auto pi = srci->project_inode(); + pi.inode->version = srci->pre_dirty(); + spi = pi.inode.get(); } } else { dout(10) << " will merge remote onto primary link" << dendl; if (destdn->is_auth()) { - auto &pi = oldin->project_inode(); - pi.inode.version = mdr->more()->pvmap[destdn] = destdn->pre_dirty(oldin->inode.version); - spi = &pi.inode; + auto pi = oldin->project_inode(); + pi.inode->version = mdr->more()->pvmap[destdn] = destdn->pre_dirty(oldin->get_version()); + spi = pi.inode.get(); } } } else { // primary @@ -8338,11 +8344,11 @@ void Server::_rename_prepare(MDRequestRef& mdr, dout(10) << " noting renamed dir open frags " << metablob->renamed_dir_frags << dendl; } } - auto &pi = srci->project_inode(); // project snaprealm if srcdnl->is_primary + auto pi = srci->project_inode(); // project snaprealm if srcdnl->is_primary // & srcdnl->snaprealm - pi.inode.version = mdr->more()->pvmap[destdn] = destdn->pre_dirty(oldpv); - pi.inode.update_backtrace(); - spi = &pi.inode; + pi.inode->version = mdr->more()->pvmap[destdn] = destdn->pre_dirty(oldpv); + pi.inode->update_backtrace(); + spi = pi.inode.get(); } destdn->push_projected_linkage(srci); } @@ -9065,7 +9071,7 @@ void Server::_logged_peer_rename(MDRequestRef& mdr, encode(exported_client_map, reply->inode_export, mds->mdsmap->get_up_features()); encode(exported_client_metadata_map, reply->inode_export); reply->inode_export.claim_append(inodebl); - reply->inode_export_v = srcdnl->get_inode()->inode.version; + reply->inode_export_v = srcdnl->get_inode()->get_version(); } // remove mdr auth pin @@ -9364,13 +9370,13 @@ void Server::do_rename_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef if (in) { bool projected; if (in->get_projected_parent_dn()->authority().first == whoami) { - auto &pi = in->project_inode(); - pip = &pi.inode; + auto pi = in->project_inode(); + pip = pi.inode.get(); mut->add_projected_inode(in); pip->version = in->pre_dirty(); projected = true; } else { - pip = in->get_projected_inode(); + // FIXME: pip = in->get_projected_inode(); projected = false; } if (pip->ctime == rollback.ctime) @@ -9433,13 +9439,13 @@ void Server::do_rename_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef bool projected; CInode::mempool_inode *ti = nullptr; if (target->get_projected_parent_dn()->authority().first == whoami) { - auto &pi = target->project_inode(); - ti = &pi.inode; + auto pi = target->project_inode(); + ti = pi.inode.get(); mut->add_projected_inode(target); ti->version = target->pre_dirty(); projected = true; } else { - ti = target->get_projected_inode(); + //FIXME: ti = target->get_projected_inode(); projected = false; } if (ti->ctime == rollback.ctime) @@ -9935,12 +9941,12 @@ void Server::handle_client_mksnap(MDRequestRef& mdr) info.name = snapname; info.stamp = mdr->get_op_stamp(); - auto &pi = diri->project_inode(false, true); - pi.inode.ctime = info.stamp; - if (info.stamp > pi.inode.rstat.rctime) - pi.inode.rstat.rctime = info.stamp; - pi.inode.rstat.rsnaps++; - pi.inode.version = diri->pre_dirty(); + auto pi = diri->project_inode(false, true); + pi.inode->ctime = info.stamp; + if (info.stamp > pi.inode->rstat.rctime) + pi.inode->rstat.rctime = info.stamp; + pi.inode->rstat.rsnaps++; + pi.inode->version = diri->pre_dirty(); // project the snaprealm auto &newsnap = *pi.snapnode; @@ -10073,12 +10079,12 @@ void Server::handle_client_rmsnap(MDRequestRef& mdr) ceph_assert(mds->snapclient->get_cached_version() >= stid); // journal - auto &pi = diri->project_inode(false, true); - pi.inode.version = diri->pre_dirty(); - pi.inode.ctime = mdr->get_op_stamp(); - if (mdr->get_op_stamp() > pi.inode.rstat.rctime) - pi.inode.rstat.rctime = mdr->get_op_stamp(); - pi.inode.rstat.rsnaps--; + auto pi = diri->project_inode(false, true); + pi.inode->version = diri->pre_dirty(); + pi.inode->ctime = mdr->get_op_stamp(); + if (mdr->get_op_stamp() > pi.inode->rstat.rctime) + pi.inode->rstat.rctime = mdr->get_op_stamp(); + pi.inode->rstat.rsnaps--; mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "rmsnap"); @@ -10217,11 +10223,11 @@ void Server::handle_client_renamesnap(MDRequestRef& mdr) ceph_assert(mds->snapclient->get_cached_version() >= stid); // journal - auto &pi = diri->project_inode(false, true); - pi.inode.ctime = mdr->get_op_stamp(); - if (mdr->get_op_stamp() > pi.inode.rstat.rctime) - pi.inode.rstat.rctime = mdr->get_op_stamp(); - pi.inode.version = diri->pre_dirty(); + auto pi = diri->project_inode(false, true); + pi.inode->ctime = mdr->get_op_stamp(); + if (mdr->get_op_stamp() > pi.inode->rstat.rctime) + pi.inode->rstat.rctime = mdr->get_op_stamp(); + pi.inode->version = diri->pre_dirty(); // project the snaprealm auto &newsnap = *pi.snapnode; diff --git a/src/mds/Server.h b/src/mds/Server.h index 30cbd19d8a4d5..b718513c0416e 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -177,7 +177,7 @@ public: bool _check_access(Session *session, CInode *in, unsigned mask, int caller_uid, int caller_gid, int setattr_uid, int setattr_gid); CDentry *prepare_stray_dentry(MDRequestRef& mdr, CInode *in); CInode* prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino, unsigned mode, - file_layout_t *layout=NULL); + const file_layout_t *layout=nullptr); void journal_allocated_inos(MDRequestRef& mdr, EMetaBlob *blob); void apply_allocated_inos(MDRequestRef& mdr, Session *session); diff --git a/src/mds/SessionMap.cc b/src/mds/SessionMap.cc index 317e04ef1d252..2049d16f5cafe 100644 --- a/src/mds/SessionMap.cc +++ b/src/mds/SessionMap.cc @@ -997,15 +997,16 @@ int Session::check_access(CInode *in, unsigned mask, if (path.length()) path = path.substr(1); // drop leading / - if (in->inode.is_dir() && - in->inode.has_layout() && - in->inode.layout.pool_ns.length() && + const auto& inode = in->get_inode(); + if (in->is_dir() && + inode->has_layout() && + inode->layout.pool_ns.length() && !connection->has_feature(CEPH_FEATURE_FS_FILE_LAYOUT_V2)) { dout(10) << __func__ << " client doesn't support FS_FILE_LAYOUT_V2" << dendl; return -EIO; } - if (!auth_caps.is_capable(path, in->inode.uid, in->inode.gid, in->inode.mode, + if (!auth_caps.is_capable(path, inode->uid, inode->gid, inode->mode, caller_uid, caller_gid, caller_gid_list, mask, new_uid, new_gid, info.inst.addr)) { diff --git a/src/mds/StrayManager.cc b/src/mds/StrayManager.cc index aab044ca68b71..7aace501831b8 100644 --- a/src/mds/StrayManager.cc +++ b/src/mds/StrayManager.cc @@ -101,7 +101,7 @@ void StrayManager::purge(CDentry *dn) SnapContext nullsnapc; PurgeItem item; - item.ino = in->inode.ino; + item.ino = in->ino(); item.stamp = ceph_clock_now(); if (in->is_dir()) { item.action = PurgeItem::PURGE_DIR; @@ -120,18 +120,17 @@ void StrayManager::purge(CDentry *dn) ceph_assert(in->last == CEPH_NOSNAP); } + const auto& pi = in->get_projected_inode(); + uint64_t to = 0; if (in->is_file()) { - to = in->inode.get_max_size(); - to = std::max(in->inode.size, to); + to = std::max(pi->size, pi->get_max_size()); // when truncating a file, the filer does not delete stripe objects that are // truncated to zero. so we need to purge stripe objects up to the max size // the file has ever been. - to = std::max(in->inode.max_size_ever, to); + to = std::max(pi->max_size_ever, to); } - auto pi = in->get_projected_inode(); - item.size = to; item.layout = pi->layout; item.old_pools.clear(); @@ -182,13 +181,13 @@ void StrayManager::_purge_stray_purged( EUpdate *le = new EUpdate(mds->mdlog, "purge_stray truncate"); mds->mdlog->start_entry(le); - auto &pi = in->project_inode(); - pi.inode.size = 0; - pi.inode.max_size_ever = 0; - pi.inode.client_ranges.clear(); - pi.inode.truncate_size = 0; - pi.inode.truncate_from = 0; - pi.inode.version = in->pre_dirty(); + auto pi = in->project_inode(); + pi.inode->size = 0; + pi.inode->max_size_ever = 0; + pi.inode->client_ranges.clear(); + pi.inode->truncate_size = 0; + pi.inode->truncate_from = 0; + pi.inode->version = in->pre_dirty(); le->metablob.add_dir_context(dn->dir); le->metablob.add_primary_dentry(dn, in, true); @@ -224,7 +223,7 @@ void StrayManager::_purge_stray_purged( pf->fragstat.nsubdirs--; else pf->fragstat.nfiles--; - pf->rstat.sub(in->inode.accounted_rstat); + pf->rstat.sub(in->get_inode()->accounted_rstat); le->metablob.add_dir_context(dn->dir); EMetaBlob::dirlump& dl = le->metablob.add_dir(dn->dir, true); @@ -460,7 +459,7 @@ bool StrayManager::_eval_stray(CDentry *dn) } // purge? - if (in->inode.nlink == 0) { + if (in->get_inode()->nlink == 0) { // past snaprealm parents imply snapped dentry remote links. // only important for directories. normal file data snaps are handled // by the object store. @@ -519,7 +518,7 @@ bool StrayManager::_eval_stray(CDentry *dn) } // don't purge multiversion inode with snap data if (in->snaprealm && in->snaprealm->has_past_parents() && - !in->old_inodes.empty()) { + in->is_any_old_inodes()) { // A file with snapshots: we will truncate the HEAD revision // but leave the metadata intact. ceph_assert(!in->is_dir()); @@ -618,7 +617,7 @@ void StrayManager::_eval_stray_remote(CDentry *stray_dn, CDentry *remote_dn) CDentry::linkage_t *stray_dnl = stray_dn->get_projected_linkage(); ceph_assert(stray_dnl->is_primary()); CInode *stray_in = stray_dnl->get_inode(); - ceph_assert(stray_in->inode.nlink >= 1); + ceph_assert(stray_in->get_inode()->nlink >= 1); ceph_assert(stray_in->last == CEPH_NOSNAP); /* If no remote_dn hinted, pick one arbitrarily */ @@ -723,19 +722,18 @@ void StrayManager::truncate(CDentry *dn) dout(10) << " realm " << *realm << dendl; const SnapContext *snapc = &realm->get_snap_context(); - uint64_t to = in->inode.get_max_size(); - to = std::max(in->inode.size, to); + uint64_t to = std::max(in->get_inode()->size, in->get_inode()->get_max_size()); // when truncating a file, the filer does not delete stripe objects that are // truncated to zero. so we need to purge stripe objects up to the max size // the file has ever been. - to = std::max(in->inode.max_size_ever, to); + to = std::max(in->get_inode()->max_size_ever, to); ceph_assert(to > 0); PurgeItem item; item.action = PurgeItem::TRUNCATE_FILE; - item.ino = in->inode.ino; - item.layout = in->inode.layout; + item.ino = in->ino(); + item.layout = in->get_inode()->layout; item.snapc = *snapc; item.size = to; item.stamp = ceph_clock_now(); diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h index 29bff82128e9b..d5917eb9b9b63 100644 --- a/src/mds/events/EMetaBlob.h +++ b/src/mds/events/EMetaBlob.h @@ -69,29 +69,27 @@ public: std::string dn; // dentry snapid_t dnfirst, dnlast; version_t dnv{0}; - CInode::mempool_inode inode; // if it's not XXX should not be part of mempool; wait for std::pmr to simplify + CInode::inode_const_ptr inode; // if it's not XXX should not be part of mempool; wait for std::pmr to simplify + CInode::xattr_map_const_ptr xattrs; fragtree_t dirfragtree; - CInode::mempool_xattr_map xattrs; std::string symlink; snapid_t oldest_snap; bufferlist snapbl; __u8 state{0}; - CInode::mempool_old_inode_map old_inodes; // XXX should not be part of mempool; wait for std::pmr to simplify + CInode::old_inode_map_const_ptr old_inodes; // XXX should not be part of mempool; wait for std::pmr to simplify fullbit(std::string_view d, snapid_t df, snapid_t dl, - version_t v, const CInode::mempool_inode& i, const fragtree_t &dft, - const CInode::mempool_xattr_map &xa, std::string_view sym, + version_t v, const CInode::inode_const_ptr& i, const fragtree_t &dft, + const CInode::xattr_map_const_ptr& xa, std::string_view sym, snapid_t os, const bufferlist &sbl, __u8 st, - const CInode::mempool_old_inode_map *oi = NULL) : + const CInode::old_inode_map_const_ptr& oi) : dn(d), dnfirst(df), dnlast(dl), dnv(v), inode(i), xattrs(xa), - oldest_snap(os), state(st) + oldest_snap(os), state(st), old_inodes(oi) { - if (i.is_symlink()) + if (i->is_symlink()) symlink = sym; - if (i.is_dir()) + if (i->is_dir()) dirfragtree = dft; - if (oi) - old_inodes = *oi; snapbl = sbl; } explicit fullbit(bufferlist::const_iterator &p) { @@ -116,7 +114,7 @@ public: void print(ostream& out) const { out << " fullbit dn " << dn << " [" << dnfirst << "," << dnlast << "] dnv " << dnv - << " inode " << inode.ino + << " inode " << inode->ino << " state=" << state << std::endl; } string state_string() const { @@ -456,7 +454,7 @@ private: in->last_journaled = event_seq; //cout << "journaling " << in->inode.ino << " at " << my_offset << std::endl; - const auto pi = in->get_projected_inode(); + const auto& pi = in->get_projected_inode(); if ((state & fullbit::STATE_DIRTY) && pi->is_backtrace_updated()) state |= fullbit::STATE_DIRTYPARENT; @@ -467,8 +465,8 @@ private: lump.nfull++; lump.add_dfull(dn->get_name(), dn->first, dn->last, dn->get_projected_version(), - *pi, in->dirfragtree, *in->get_projected_xattrs(), in->symlink, - in->oldest_snap, snapbl, state, &in->old_inodes); + pi, in->dirfragtree, in->get_projected_xattrs(), in->symlink, + in->oldest_snap, snapbl, state, in->get_old_inodes()); } // convenience: primary or remote? figure it out. @@ -503,9 +501,9 @@ private: in->last_journaled = event_seq; //cout << "journaling " << in->inode.ino << " at " << my_offset << std::endl; - const auto& pi = *(in->get_projected_inode()); + const auto& pi = in->get_projected_inode(); + const auto& px = in->get_projected_xattrs(); const auto& pdft = in->dirfragtree; - const auto& px = *(in->get_projected_xattrs()); bufferlist snapbl; const sr_t *sr = in->get_projected_srnode(); @@ -513,7 +511,7 @@ private: sr->encode(snapbl); for (auto p = roots.begin(); p != roots.end(); ++p) { - if (p->inode.ino == in->ino()) { + if (p->inode->ino == in->ino()) { roots.erase(p); break; } @@ -522,7 +520,7 @@ private: string empty; roots.emplace_back(empty, in->first, in->last, 0, pi, pdft, px, in->symlink, in->oldest_snap, snapbl, (dirty ? fullbit::STATE_DIRTY : 0), - &in->old_inodes); + in->get_old_inodes()); } dirlump& add_dir(CDir *dir, bool dirty, bool complete=false) { diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 1b18d9b044e9b..32d7f04367d26 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -398,22 +398,26 @@ void EMetaBlob::fullbit::encode(bufferlist& bl, uint64_t features) const { encode(dnfirst, bl); encode(dnlast, bl); encode(dnv, bl); - encode(inode, bl, features); - encode(xattrs, bl); - if (inode.is_symlink()) + encode(*inode, bl, features); + if (xattrs) + encode(*xattrs, bl); + else + encode((__u32)0, bl); + + if (inode->is_symlink()) encode(symlink, bl); - if (inode.is_dir()) { + if (inode->is_dir()) { encode(dirfragtree, bl); encode(snapbl, bl); } encode(state, bl); - if (old_inodes.empty()) { + if (!old_inodes || old_inodes->empty()) { encode(false, bl); } else { encode(true, bl); - encode(old_inodes, bl, features); + encode(*old_inodes, bl, features); } - if (!inode.is_dir()) + if (!inode->is_dir()) encode(snapbl, bl); encode(oldest_snap, bl); ENCODE_FINISH(bl); @@ -425,11 +429,20 @@ void EMetaBlob::fullbit::decode(bufferlist::const_iterator &bl) { decode(dnfirst, bl); decode(dnlast, bl); decode(dnv, bl); - decode(inode, bl); - decode_noshare(xattrs, bl); - if (inode.is_symlink()) + { + auto _inode = CInode::allocate_inode(); + decode(*_inode, bl); + inode = std::move(_inode); + } + { + CInode::mempool_xattr_map tmp; + decode_noshare(tmp, bl); + if (!tmp.empty()) + xattrs = CInode::allocate_xattr_map(std::move(tmp)); + } + if (inode->is_symlink()) decode(symlink, bl); - if (inode.is_dir()) { + if (inode->is_dir()) { decode(dirfragtree, bl); decode(snapbl, bl); } @@ -437,9 +450,11 @@ void EMetaBlob::fullbit::decode(bufferlist::const_iterator &bl) { bool old_inodes_present; decode(old_inodes_present, bl); if (old_inodes_present) { - decode(old_inodes, bl); + auto _old_inodes = CInode::allocate_old_inode_map(); + decode(*_old_inodes, bl); + old_inodes = std::move(_old_inodes); } - if (!inode.is_dir()) { + if (!inode->is_dir()) { decode(snapbl, bl); } decode(oldest_snap, bl); @@ -453,21 +468,23 @@ void EMetaBlob::fullbit::dump(Formatter *f) const f->dump_stream("snapid.last") << dnlast; f->dump_int("dentry version", dnv); f->open_object_section("inode"); - inode.dump(f); + inode->dump(f); f->close_section(); // inode f->open_object_section("xattrs"); - for (const auto &p : xattrs) { - std::string s(p.second.c_str(), p.second.length()); - f->dump_string(p.first.c_str(), s); + if (xattrs) { + for (const auto &p : *xattrs) { + std::string s(p.second.c_str(), p.second.length()); + f->dump_string(p.first.c_str(), s); + } } f->close_section(); // xattrs - if (inode.is_symlink()) { + if (inode->is_symlink()) { f->dump_string("symlink", symlink); } - if (inode.is_dir()) { + if (inode->is_dir()) { f->dump_stream("frag tree") << dirfragtree; f->dump_string("has_snapbl", snapbl.length() ? "true" : "false"); - if (inode.has_layout()) { + if (inode->has_layout()) { f->open_object_section("file layout policy"); // FIXME f->dump_string("layout", "the layout exists"); @@ -475,9 +492,9 @@ void EMetaBlob::fullbit::dump(Formatter *f) const } } f->dump_string("state", state_string()); - if (!old_inodes.empty()) { + if (old_inodes && !old_inodes->empty()) { f->open_array_section("old inodes"); - for (const auto &p : old_inodes) { + for (const auto &p : *old_inodes) { f->open_object_section("inode"); f->dump_int("snapid", p.first); p.second.dump(f); @@ -489,21 +506,21 @@ void EMetaBlob::fullbit::dump(Formatter *f) const void EMetaBlob::fullbit::generate_test_instances(std::list& ls) { - CInode::mempool_inode inode; + auto _inode = CInode::allocate_inode(); fragtree_t fragtree; - CInode::mempool_xattr_map empty_xattrs; + auto _xattrs = CInode::allocate_xattr_map(); bufferlist empty_snapbl; fullbit *sample = new fullbit("/testdn", 0, 0, 0, - inode, fragtree, empty_xattrs, "", 0, empty_snapbl, + _inode, fragtree, _xattrs, "", 0, empty_snapbl, false, NULL); ls.push_back(sample); } void EMetaBlob::fullbit::update_inode(MDSRank *mds, CInode *in) { - in->inode = inode; - in->xattrs = xattrs; - if (in->inode.is_dir()) { + in->reset_inode(std::move(inode)); + in->reset_xattrs(std::move(xattrs)); + if (in->is_dir()) { if (is_export_ephemeral_random()) { dout(15) << "random ephemeral pin on " << *in << dendl; in->set_ephemeral_rand(true); @@ -514,7 +531,7 @@ void EMetaBlob::fullbit::update_inode(MDSRank *mds, CInode *in) if (!(in->dirfragtree == dirfragtree)) { dout(10) << "EMetaBlob::fullbit::update_inode dft " << in->dirfragtree << " -> " << dirfragtree << " on " << *in << dendl; - in->dirfragtree = dirfragtree; + in->dirfragtree = std::move(dirfragtree); in->force_dirfrags(); if (in->get_num_dirfrags() && in->authority() == CDIR_AUTH_UNDEF) { auto&& ls = in->get_nested_dirfrags(); @@ -527,12 +544,12 @@ void EMetaBlob::fullbit::update_inode(MDSRank *mds, CInode *in) } } } - } else if (in->inode.is_symlink()) { + } else if (in->is_symlink()) { in->symlink = symlink; } - in->old_inodes = old_inodes; - if (!in->old_inodes.empty()) { - snapid_t min_first = in->old_inodes.rbegin()->first + 1; + in->reset_old_inodes(std::move(old_inodes)); + if (in->is_any_old_inodes()) { + snapid_t min_first = in->get_old_inodes()->rbegin()->first + 1; if (min_first > in->first) in->first = min_first; } @@ -552,9 +569,10 @@ void EMetaBlob::fullbit::update_inode(MDSRank *mds, CInode *in) */ if (in->is_file()) { // Files must have valid layouts with a pool set - if (in->inode.layout.pool_id == -1 || !in->inode.layout.is_valid()) { + if (in->get_inode()->layout.pool_id == -1 || + !in->get_inode()->layout.is_valid()) { dout(0) << "EMetaBlob.replay invalid layout on ino " << *in - << ": " << in->inode.layout << dendl; + << ": " << in->get_inode()->layout << dendl; std::ostringstream oss; oss << "Invalid layout for inode " << in->ino() << " in journal"; mds->clog->error() << oss.str(); @@ -847,7 +865,7 @@ void EMetaBlob::get_inodes( // Record inodes of fullbits for (const auto& iter : dl.get_dfull()) { - inodes.insert(iter.inode.ino); + inodes.insert(iter.inode->ino); } // Record inodes of remotebits @@ -922,7 +940,7 @@ void EMetaBlob::get_paths( for (const auto& iter : dl.get_dfull()) { std::string_view dentry = iter.dn; children[dir_ino].emplace_back(dentry); - ino_locations[iter.inode.ino] = Location(dir_ino, dentry); + ino_locations[iter.inode->ino] = Location(dir_ino, dentry); } for (const auto& iter : dl.get_dremote()) { @@ -948,7 +966,7 @@ void EMetaBlob::get_paths( for (const auto& iter : dl.get_dfull()) { std::string_view dentry = iter.dn; - if (children.find(iter.inode.ino) == children.end()) { + if (children.find(iter.inode->ino) == children.end()) { leaf_locations.push_back(Location(dir_ino, dentry)); } } @@ -1081,7 +1099,7 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDPeerUpdate *peerup) ceph_assert(g_conf()->mds_kill_journal_replay_at != 1); for (auto& p : roots) { - CInode *in = mds->mdcache->get_inode(p.inode.ino); + CInode *in = mds->mdcache->get_inode(p.inode->ino); bool isnew = in ? false:true; if (!in) in = new CInode(mds->mdcache, false, 2, CEPH_NOSNAP); @@ -1209,7 +1227,7 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDPeerUpdate *peerup) if (lump.is_importing()) dn->state_set(CDentry::STATE_AUTH); - CInode *in = mds->mdcache->get_inode(fb.inode.ino, fb.dnlast); + CInode *in = mds->mdcache->get_inode(fb.inode->ino, fb.dnlast); if (!in) { in = new CInode(mds->mdcache, dn->is_auth(), fb.dnfirst, fb.dnlast); fb.update_inode(mds, in); @@ -1219,7 +1237,7 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDPeerUpdate *peerup) unlinked[dn->get_linkage()->get_inode()] = dir; stringstream ss; ss << "EMetaBlob.replay FIXME had dentry linked to wrong inode " << *dn - << " " << *dn->get_linkage()->get_inode() << " should be " << fb.inode.ino; + << " " << *dn->get_linkage()->get_inode() << " should be " << in->ino(); dout(0) << ss.str() << dendl; mds->clog->warn(ss); } @@ -1243,7 +1261,7 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDPeerUpdate *peerup) unlinked[dn->get_linkage()->get_inode()] = dir; stringstream ss; ss << "EMetaBlob.replay FIXME had dentry linked to wrong inode " << *dn - << " " << *dn->get_linkage()->get_inode() << " should be " << fb.inode.ino; + << " " << *dn->get_linkage()->get_inode() << " should be " << in->ino(); dout(0) << ss.str() << dendl; mds->clog->warn(ss); } diff --git a/src/mds/mdstypes.cc b/src/mds/mdstypes.cc index 6c3038588602b..86fc0156522b8 100644 --- a/src/mds/mdstypes.cc +++ b/src/mds/mdstypes.cc @@ -275,9 +275,11 @@ void inline_data_t::decode(bufferlist::const_iterator &p) decode(version, p); uint32_t inline_len; decode(inline_len, p); - if (inline_len > 0) - ceph::decode_nohead(inline_len, get_data(), p); - else + if (inline_len > 0) { + ceph::buffer::list bl; + decode_nohead(inline_len, bl, p); + set_data(bl); + } else free_data(); } diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index b41ecb3e5dc7b..78d8405ac9d43 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -20,7 +20,6 @@ #include "include/frag.h" #include "include/xlist.h" #include "include/interval_set.h" -#include "include/compact_map.h" #include "include/compact_set.h" #include "include/fs_types.h" @@ -397,12 +396,12 @@ public: inline_data_t() {} inline_data_t(const inline_data_t& o) : version(o.version) { if (o.blp) - get_data() = *o.blp; + set_data(*o.blp); } inline_data_t& operator=(const inline_data_t& o) { version = o.version; if (o.blp) - get_data() = *o.blp; + set_data(*o.blp); else free_data(); return *this; @@ -411,10 +410,16 @@ public: void free_data() { blp.reset(); } - ceph::buffer::list& get_data() { + void get_data(ceph::buffer::list& ret) const { + if (blp) + ret = *blp; + else + ret.clear(); + } + void set_data(const ceph::buffer::list& bl) { if (!blp) blp.reset(new ceph::buffer::list); - return *blp; + *blp = bl; } size_t length() const { return blp ? blp->length() : 0; } @@ -1007,11 +1012,11 @@ template class Allocator> using alloc_string = std::basic_string,Allocator>; template class Allocator> -using xattr_map = compact_map, - ceph::bufferptr, - std::less>, - Allocator, - ceph::bufferptr>>>; // FIXME bufferptr not in mempool +using xattr_map = std::map, + ceph::bufferptr, + std::less>, + Allocator, + ceph::bufferptr>>>; // FIXME bufferptr not in mempool template class Allocator> inline void decode_noshare(xattr_map& xattrs, ceph::buffer::list::const_iterator &p) diff --git a/src/messages/MMDSCacheRejoin.h b/src/messages/MMDSCacheRejoin.h index 09f304ae34803..93ff1267dacdc 100644 --- a/src/messages/MMDSCacheRejoin.h +++ b/src/messages/MMDSCacheRejoin.h @@ -188,14 +188,14 @@ public: } void add_inode_locks(CInode *in, __u32 nonce, ceph::buffer::list& bl) { using ceph::encode; - encode(in->inode.ino, inode_locks); + encode(in->ino(), inode_locks); encode(in->last, inode_locks); encode(nonce, inode_locks); encode(bl, inode_locks); } void add_inode_base(CInode *in, uint64_t features) { using ceph::encode; - encode(in->inode.ino, inode_base); + encode(in->ino(), inode_base); encode(in->last, inode_base); ceph::buffer::list bl; in->_encode_base(bl, features); diff --git a/src/tools/cephfs/DataScan.cc b/src/tools/cephfs/DataScan.cc index 1698fdd7e4f4b..d279f283c2dd2 100644 --- a/src/tools/cephfs/DataScan.cc +++ b/src/tools/cephfs/DataScan.cc @@ -341,43 +341,43 @@ int MetadataDriver::inject_unlinked_inode( } // Compose - InodeStore inode; - inode.inode.ino = inono; - inode.inode.version = 1; - inode.inode.xattr_version = 1; - inode.inode.mode = 0500 | mode; + InodeStore inode_data; + auto inode = inode_data.get_inode(); + inode->ino = inono; + inode->version = 1; + inode->xattr_version = 1; + inode->mode = 0500 | mode; // Fake dirstat.nfiles to 1, so that the directory doesn't appear to be empty // (we won't actually give the *correct* dirstat here though) - inode.inode.dirstat.nfiles = 1; + inode->dirstat.nfiles = 1; - inode.inode.ctime = - inode.inode.mtime = ceph_clock_now(); - inode.inode.nlink = 1; - inode.inode.truncate_size = -1ull; - inode.inode.truncate_seq = 1; - inode.inode.uid = g_conf()->mds_root_ino_uid; - inode.inode.gid = g_conf()->mds_root_ino_gid; + inode->ctime = inode->mtime = ceph_clock_now(); + inode->nlink = 1; + inode->truncate_size = -1ull; + inode->truncate_seq = 1; + inode->uid = g_conf()->mds_root_ino_uid; + inode->gid = g_conf()->mds_root_ino_gid; // Force layout to default: should we let users override this so that // they don't have to mount the filesystem to correct it? - inode.inode.layout = file_layout_t::get_default(); - inode.inode.layout.pool_id = data_pool_id; - inode.inode.dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash; + inode->layout = file_layout_t::get_default(); + inode->layout.pool_id = data_pool_id; + inode->dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash; // Assume that we will get our stats wrong, and that we may // be ignoring dirfrags that exist - inode.damage_flags |= (DAMAGE_STATS | DAMAGE_RSTATS | DAMAGE_FRAGTREE); + inode_data.damage_flags |= (DAMAGE_STATS | DAMAGE_RSTATS | DAMAGE_FRAGTREE); if (inono == MDS_INO_ROOT || MDS_INO_IS_MDSDIR(inono)) { sr_t srnode; srnode.seq = 1; - encode(srnode, inode.snap_blob); + encode(srnode, inode_data.snap_blob); } // Serialize bufferlist inode_bl; encode(std::string(CEPH_FS_ONDISK_MAGIC), inode_bl); - inode.encode(inode_bl, CEPH_FEATURES_SUPPORTED_DEFAULT); + inode_data.encode(inode_bl, CEPH_FEATURES_SUPPORTED_DEFAULT); // Write r = metadata_io.write_full(oid.name, inode_bl); @@ -922,9 +922,9 @@ int DataScan::scan_links() bool is_dir; map snaps; link_info_t() : version(0), nlink(0), is_dir(false) {} - link_info_t(inodeno_t di, frag_t df, const string& n, const CInode::mempool_inode& i) : + link_info_t(inodeno_t di, frag_t df, const string& n, const CInode::inode_const_ptr& i) : dirino(di), frag(df), name(n), - version(i.version), nlink(i.nlink), is_dir(S_IFDIR & i.mode) {} + version(i->version), nlink(i->nlink), is_dir(S_IFDIR & i->mode) {} dirfrag_t dirfrag() const { return dirfrag_t(dirino, frag); } @@ -992,7 +992,7 @@ int DataScan::scan_links() if (dentry_type == 'I') { InodeStore inode; inode.decode_bare(q); - inodeno_t ino = inode.inode.ino; + inodeno_t ino = inode.inode->ino; if (step == SCAN_INOS) { if (used_inos.contains(ino, 1)) { @@ -1020,9 +1020,10 @@ int DataScan::scan_links() snaprealm_v2_since = last + 1; } } - if (!inode.old_inodes.empty()) { - if (inode.old_inodes.rbegin()->first > last_snap) - last_snap = inode.old_inodes.rbegin()->first; + if (inode.old_inodes && !inode.old_inodes->empty()) { + auto _last_snap = inode.old_inodes->rbegin()->first; + if (_last_snap > last_snap) + last_snap = _last_snap; } auto q = dup_primaries.find(ino); if (q != dup_primaries.end()) { @@ -1035,9 +1036,9 @@ int DataScan::scan_links() nlink = r->second; if (!MDS_INO_IS_STRAY(dir_ino)) nlink++; - if (inode.inode.nlink != nlink) { + if (inode.inode->nlink != nlink) { derr << "Bad nlink on " << ino << " expected " << nlink - << " has " << inode.inode.nlink << dendl; + << " has " << inode.inode->nlink << dendl; bad_nlink_inos[ino] = link_info_t(dir_ino, frag_id, dname, inode.inode); bad_nlink_inos[ino].nlink = nlink; } @@ -1187,10 +1188,10 @@ int DataScan::scan_links() return r; } - if (inode.inode.ino != p.first || inode.inode.version != p.second.version) + if (inode.inode->ino != p.first || inode.inode->version != p.second.version) continue; - inode.inode.nlink = p.second.nlink; + inode.get_inode()->nlink = p.second.nlink; r = metadata_driver->inject_linkage(p.second.dirino, p.second.name, p.second.frag, inode, first); if (r < 0) return r; @@ -1562,7 +1563,7 @@ int MetadataDriver::inject_lost_and_found( return r; } } else { - if (!(lf_ino.inode.mode & S_IFDIR)) { + if (!(lf_ino.inode->mode & S_IFDIR)) { derr << "lost+found exists but is not a directory!" << dendl; // In this case we error out, and the user should do something about // this problem. @@ -1575,13 +1576,10 @@ int MetadataDriver::inject_lost_and_found( return r; } - InodeStore recovered_ino; - - const std::string dname = lost_found_dname(ino); // Write dentry into lost+found dirfrag - return inject_linkage(lf_ino.inode.ino, dname, frag_t(), dentry); + return inject_linkage(lf_ino.inode->ino, dname, frag_t(), dentry); } @@ -1649,9 +1647,9 @@ int MetadataDriver::get_frag_of( r = read_dentry(parent_ino, frag_t(), parent_dname, &existing_dentry); if (r >= 0) { // Great, fast path: return the fragtree from here - if (existing_dentry.inode.ino != dirino) { + if (existing_dentry.inode->ino != dirino) { dout(4) << "Unexpected inode in dentry! 0x" << std::hex - << existing_dentry.inode.ino + << existing_dentry.inode->ino << " vs expected 0x" << dirino << std::dec << dendl; return -ENOENT; } @@ -1793,7 +1791,7 @@ int MetadataDriver::inject_with_backtrace( break; } else { // Dentry already present, does it link to me? - if (existing_dentry.inode.ino == ino) { + if (existing_dentry.inode->ino == ino) { dout(20) << "Dentry 0x" << std::hex << parent_ino << std::dec << "/" << dname << " already exists and points to me" << dendl; @@ -1801,7 +1799,7 @@ int MetadataDriver::inject_with_backtrace( derr << "Dentry 0x" << std::hex << parent_ino << std::dec << "/" << dname << " already exists but points to 0x" - << std::hex << existing_dentry.inode.ino << std::dec << dendl; + << std::hex << existing_dentry.inode->ino << std::dec << dendl; // Fall back to lost+found! return inject_lost_and_found(backtrace.ino, dentry); } @@ -1815,28 +1813,29 @@ int MetadataDriver::inject_with_backtrace( // This is the linkage for the file of interest dout(10) << "Linking inode 0x" << std::hex << ino << " at 0x" << parent_ino << "/" << dname << std::dec - << " with size=" << dentry.inode.size << " bytes" << dendl; + << " with size=" << dentry.inode->size << " bytes" << dendl; r = inject_linkage(parent_ino, dname, fragment, dentry); } else { // This is the linkage for an ancestor directory InodeStore ancestor_dentry; - ancestor_dentry.inode.mode = 0755 | S_IFDIR; + auto inode = ancestor_dentry.get_inode(); + inode->mode = 0755 | S_IFDIR; // Set nfiles to something non-zero, to fool any other code // that tries to ignore 'empty' directories. This won't be // accurate, but it should avoid functional issues. - ancestor_dentry.inode.dirstat.nfiles = 1; - ancestor_dentry.inode.dir_layout.dl_dir_hash = - g_conf()->mds_default_dir_hash; + inode->dirstat.nfiles = 1; + inode->dir_layout.dl_dir_hash = + g_conf()->mds_default_dir_hash; - ancestor_dentry.inode.nlink = 1; - ancestor_dentry.inode.ino = ino; - ancestor_dentry.inode.uid = g_conf()->mds_root_ino_uid; - ancestor_dentry.inode.gid = g_conf()->mds_root_ino_gid; - ancestor_dentry.inode.version = 1; - ancestor_dentry.inode.backtrace_version = 1; + inode->nlink = 1; + inode->ino = ino; + inode->uid = g_conf()->mds_root_ino_uid; + inode->gid = g_conf()->mds_root_ino_gid; + inode->version = 1; + inode->backtrace_version = 1; r = inject_linkage(parent_ino, dname, fragment, ancestor_dentry); } @@ -1968,7 +1967,7 @@ int MetadataDriver::inject_linkage( } else { dout(20) << "Injected dentry 0x" << std::hex << dir_ino << "/" << dname << " pointing to 0x" - << inode.inode.ino << std::dec << dendl; + << inode.inode->ino << std::dec << dendl; return 0; } } @@ -2064,8 +2063,8 @@ int LocalFileDriver::inject_with_backtrace( if (is_file) { // FIXME: inject_data won't cope with interesting (i.e. striped) // layouts (need a librados-compatible Filer to read these) - inject_data(path_builder, dentry.inode.size, - dentry.inode.layout.object_size, bt.ino); + inject_data(path_builder, dentry.inode->size, + dentry.inode->layout.object_size, bt.ino); } else { int r = mkdir(path_builder.c_str(), 0755); if (r != 0 && r != -EPERM) { @@ -2092,8 +2091,8 @@ int LocalFileDriver::inject_lost_and_found( } std::string file_path = lf_path + "/" + lost_found_dname(ino); - return inject_data(file_path, dentry.inode.size, - dentry.inode.layout.object_size, ino); + return inject_data(file_path, dentry.inode->size, + dentry.inode->layout.object_size, ino); } int LocalFileDriver::init_roots(int64_t data_pool_id) @@ -2137,26 +2136,27 @@ void MetadataTool::build_file_dentry( { ceph_assert(out != NULL); - out->inode.mode = 0500 | S_IFREG; - out->inode.size = file_size; - out->inode.max_size_ever = file_size; - out->inode.mtime.tv.tv_sec = file_mtime; - out->inode.atime.tv.tv_sec = file_mtime; - out->inode.ctime.tv.tv_sec = file_mtime; + auto inode = out->get_inode(); + inode->mode = 0500 | S_IFREG; + inode->size = file_size; + inode->max_size_ever = file_size; + inode->mtime.tv.tv_sec = file_mtime; + inode->atime.tv.tv_sec = file_mtime; + inode->ctime.tv.tv_sec = file_mtime; - out->inode.layout = layout; + inode->layout = layout; - out->inode.truncate_seq = 1; - out->inode.truncate_size = -1ull; + inode->truncate_seq = 1; + inode->truncate_size = -1ull; - out->inode.inline_data.version = CEPH_INLINE_NONE; + inode->inline_data.version = CEPH_INLINE_NONE; - out->inode.nlink = 1; - out->inode.ino = ino; - out->inode.version = 1; - out->inode.backtrace_version = 1; - out->inode.uid = g_conf()->mds_root_ino_uid; - out->inode.gid = g_conf()->mds_root_ino_gid; + inode->nlink = 1; + inode->ino = ino; + inode->version = 1; + inode->backtrace_version = 1; + inode->uid = g_conf()->mds_root_ino_uid; + inode->gid = g_conf()->mds_root_ino_gid; } void MetadataTool::build_dir_dentry( @@ -2165,25 +2165,26 @@ void MetadataTool::build_dir_dentry( { ceph_assert(out != NULL); - out->inode.mode = 0755 | S_IFDIR; - out->inode.dirstat = fragstat; - out->inode.mtime.tv.tv_sec = fragstat.mtime; - out->inode.atime.tv.tv_sec = fragstat.mtime; - out->inode.ctime.tv.tv_sec = fragstat.mtime; + auto inode = out->get_inode(); + inode->mode = 0755 | S_IFDIR; + inode->dirstat = fragstat; + inode->mtime.tv.tv_sec = fragstat.mtime; + inode->atime.tv.tv_sec = fragstat.mtime; + inode->ctime.tv.tv_sec = fragstat.mtime; - out->inode.layout = layout; - out->inode.dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash; + inode->layout = layout; + inode->dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash; - out->inode.truncate_seq = 1; - out->inode.truncate_size = -1ull; + inode->truncate_seq = 1; + inode->truncate_size = -1ull; - out->inode.inline_data.version = CEPH_INLINE_NONE; + inode->inline_data.version = CEPH_INLINE_NONE; - out->inode.nlink = 1; - out->inode.ino = ino; - out->inode.version = 1; - out->inode.backtrace_version = 1; - out->inode.uid = g_conf()->mds_root_ino_uid; - out->inode.gid = g_conf()->mds_root_ino_gid; + inode->nlink = 1; + inode->ino = ino; + inode->version = 1; + inode->backtrace_version = 1; + inode->uid = g_conf()->mds_root_ino_uid; + inode->gid = g_conf()->mds_root_ino_gid; } diff --git a/src/tools/cephfs/JournalTool.cc b/src/tools/cephfs/JournalTool.cc index f6d7c4116354c..0c9683de6084e 100644 --- a/src/tools/cephfs/JournalTool.cc +++ b/src/tools/cephfs/JournalTool.cc @@ -838,9 +838,9 @@ int JournalTool::recover_dentries( InodeStore inode; inode.decode_bare(q); dout(4) << "decoded embedded inode version " - << inode.inode.version << " vs fullbit version " - << fb.inode.version << dendl; - if (inode.inode.version < fb.inode.version) { + << inode.inode->version << " vs fullbit version " + << fb.inode->version << dendl; + if (inode.inode->version < fb.inode->version) { write_dentry = true; } } else { @@ -862,7 +862,7 @@ int JournalTool::recover_dentries( // Record for writing to RADOS write_vals[key] = dentry_bl; - consumed_inos->insert(fb.inode.ino); + consumed_inos->insert(fb.inode->ino); } } @@ -996,7 +996,7 @@ int JournalTool::recover_dentries( * of directories */ for (const auto& fb : metablob.roots) { - inodeno_t ino = fb.inode.ino; + inodeno_t ino = fb.inode->ino; dout(4) << "updating root 0x" << std::hex << ino << std::dec << dendl; object_t root_oid = InodeStore::get_object_name(ino, frag_t(), ".inode"); @@ -1020,7 +1020,7 @@ int JournalTool::recover_dentries( dout(4) << "magic ok" << dendl; old_inode.decode(inode_bl_iter); - if (old_inode.inode.version < fb.inode.version) { + if (old_inode.inode->version < fb.inode->version) { write_root_ino = true; } } else { @@ -1035,7 +1035,7 @@ int JournalTool::recover_dentries( if (write_root_ino && !dry_run) { dout(4) << "writing root ino " << root_oid.name - << " version " << fb.inode.version << dendl; + << " version " << fb.inode->version << dendl; // Compose: root ino format is magic,InodeStore(bare=false) bufferlist new_root_ino_bl; diff --git a/src/tools/cephfs/MetaTool.cc b/src/tools/cephfs/MetaTool.cc index 128cfcad5a0bf..b9c79d489d0ec 100644 --- a/src/tools/cephfs/MetaTool.cc +++ b/src/tools/cephfs/MetaTool.cc @@ -425,7 +425,7 @@ int MetaTool::_show_fn(inode_meta_t& inode_meta, const string& fn) f->open_object_section("fnodes"); for (const auto &frag : frags) { bufferlist hbl; - string oid = obj_name(inode_meta.get_meta()->inode.ino, frag); + string oid = obj_name(inode_meta.get_meta()->inode->ino, frag); int ret = io_meta.omap_get_header(oid, &hbl); if (ret < 0) { std::cerr << __func__ << " : can't find oid("<< oid << ")" << std::endl; @@ -967,10 +967,10 @@ int MetaTool::show_child(std::string_view key, f->close_section(); f->flush(ds); - if (sp_ino > 0 && op != NULL && sp_ino == inode_data.inode.ino) { + if (sp_ino > 0 && op != NULL && sp_ino == inode_data.inode->ino) { inode_meta_t* tmp = new inode_meta_t(first, type, &inode_data); - op->inodes[inode_data.inode.ino] = tmp; - op->okeys[inode_data.inode.ino] = key.data(); + op->inodes[inode_data.inode->ino] = tmp; + op->okeys[inode_data.inode->ino] = key.data(); return 1; } else { delete &inode_data;