From fb3bf789f4d6cffa3ec316bcc72c2a23b8812e86 Mon Sep 17 00:00:00 2001 From: Kotresh HR Date: Thu, 20 Feb 2025 13:26:30 +0530 Subject: [PATCH] mds/link: Create referent inode and store backtrace On hardlink creation, create a referent inode (CInode) and store backtrace for the hardlink. This patch takes care of single mds. Fixes: https://tracker.ceph.com/issues/54205 Signed-off-by: Kotresh HR --- src/mds/Locker.cc | 6 ++-- src/mds/Server.cc | 70 +++++++++++++++++++++++++++++++++-------- src/mds/Server.h | 4 +-- src/mds/StrayManager.cc | 4 +-- 4 files changed, 64 insertions(+), 20 deletions(-) diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 2c4f0e1722694..8ef4bed9ec7fc 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -423,7 +423,7 @@ bool Locker::acquire_locks(const MDRequestRef& mdr, if (mdr->lock_cache) { // debug ceph_assert(mdr->lock_cache->opcode == CEPH_MDS_OP_UNLINK); CDentry *dn = mdr->dn[0].back(); - ceph_assert(dn->get_projected_linkage()->is_remote()); + ceph_assert(dn->get_projected_linkage()->is_remote() || dn->get_projected_linkage()->is_referent_remote()); } if (object->is_ambiguous_auth()) { @@ -459,7 +459,7 @@ bool Locker::acquire_locks(const MDRequestRef& mdr, { // debug ceph_assert(mdr->lock_cache->opcode == CEPH_MDS_OP_UNLINK); CDentry *dn = mdr->dn[0].back(); - ceph_assert(dn->get_projected_linkage()->is_remote()); + ceph_assert(dn->get_projected_linkage()->is_remote() || dn->get_projected_linkage()->is_referent_remote()); } } @@ -4511,7 +4511,7 @@ void Locker::issue_client_lease(CDentry *dn, CInode *in, const MDRequestRef& mdr ceph_assert(dnl->get_inode() == in); mask = CEPH_LEASE_PRIMARY_LINK; } else { - if (dnl->is_remote()) + if (dnl->is_remote() || dnl->is_referent_remote()) ceph_assert(dnl->get_remote_ino() == in->ino()); else ceph_assert(!in); diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 2ea3559731aa3..387c403254651 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -3528,7 +3528,7 @@ CDentry* Server::prepare_stray_dentry(const MDRequestRef& mdr, CInode *in) * create a new inode. set c/m/atime. hit dir pop. */ CInode* Server::prepare_new_inode(const MDRequestRef& mdr, CDir *dir, inodeno_t useino, unsigned mode, - const file_layout_t *layout) + const file_layout_t *layout, bool referent_inode) { CInode *in = new CInode(mdcache); auto _inode = in->_get_inode(); @@ -3620,12 +3620,25 @@ CInode* Server::prepare_new_inode(const MDRequestRef& mdr, CDir *dir, inodeno_t _inode->mode |= S_ISGID; } } else { - _inode->gid = mdr->client_request->get_owner_gid(); - ceph_assert(_inode->gid != (unsigned)-1); + if (!referent_inode) { + _inode->gid = mdr->client_request->get_owner_gid(); + ceph_assert(_inode->gid != (unsigned)-1); + } + } + + if (!referent_inode) { + _inode->uid = mdr->client_request->get_owner_uid(); + ceph_assert(_inode->uid != (unsigned)-1); } - _inode->uid = mdr->client_request->get_owner_uid(); - ceph_assert(_inode->uid != (unsigned)-1); + /* The referent inode is created on hardlink, so the client request wouldn't + * have uid, gid set. So don't use uid and gid from client if it's a referent + * inode. + */ + if (referent_inode) { + _inode->gid = 0; + _inode->uid = 0; + } _inode->btime = _inode->ctime = _inode->mtime = _inode->atime = mdr->get_op_stamp(); @@ -7702,17 +7715,18 @@ void Server::handle_client_link(const MDRequestRef& mdr) class C_MDS_link_local_finish : public ServerLogContext { CDentry *dn; CInode *targeti; + CInode *referenti; version_t dnpv; version_t tipv; bool adjust_realm; public: - C_MDS_link_local_finish(Server *s, const MDRequestRef& r, CDentry *d, CInode *ti, + C_MDS_link_local_finish(Server *s, const MDRequestRef& r, CDentry *d, CInode *ti, CInode *ri, version_t dnpv_, version_t tipv_, bool ar) : - ServerLogContext(s, r), dn(d), targeti(ti), + ServerLogContext(s, r), dn(d), targeti(ti), referenti(ri), dnpv(dnpv_), tipv(tipv_), adjust_realm(ar) { } void finish(int r) override { ceph_assert(r == 0); - server->_link_local_finish(mdr, dn, targeti, dnpv, tipv, adjust_realm); + server->_link_local_finish(mdr, dn, targeti, referenti, dnpv, tipv, adjust_realm); } }; @@ -7736,6 +7750,24 @@ void Server::_link_local(const MDRequestRef& mdr, CDentry *dn, CInode *targeti, pi.inode->change_attr++; pi.inode->version = tipv; + // create referent inode. Don't re-create on retry + CInode *newi = nullptr; + if (mds->mdsmap->allow_referent_inodes()) { + if (!mdr->alloc_ino && !mdr->used_prealloc_ino) + newi = prepare_new_inode(mdr, dn->get_dir(), inodeno_t(0), pi.inode->mode, nullptr, true); + else + newi = mdcache->get_inode(mdr->alloc_ino ? mdr->alloc_ino : mdr->used_prealloc_ino); + ceph_assert(newi); + + auto _inode = newi->_get_inode(); + _inode->version = dnpv; + _inode->update_backtrace(); + + /* NOTE: layout, rstat accounting and snapshot related inode updates are not + * required and hence not done for referent inodes. + */ + } + bool adjust_realm = false; if (!target_realm->get_subvolume_ino() && !targeti->is_projected_snaprealm_global()) { sr_t *newsnap = targeti->project_snaprealm(); @@ -7749,18 +7781,24 @@ void Server::_link_local(const MDRequestRef& mdr, CDentry *dn, CInode *targeti, le->metablob.add_client_req(mdr->reqid, mdr->client_request->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, targeti, dn->get_dir(), PREDIRTY_DIR, 1); // new dn mdcache->predirty_journal_parents(mdr, &le->metablob, targeti, 0, PREDIRTY_PRIMARY); // targeti - // TODO: Pass referent inode upon creation. It's adding just remote dentry now - le->metablob.add_remote_dentry(dn, true, targeti->ino(), targeti->d_type(), 0, nullptr); // new remote + dout(20) << __func__ << " calling metablob add_remote_dentry with referent_ino= " << (newi ? newi->ino() : inodeno_t(0)) << dendl; + le->metablob.add_remote_dentry(dn, true, targeti->ino(), targeti->d_type(), newi ? newi->ino() : inodeno_t(0), newi); // new remote mdcache->journal_dirty_inode(mdr.get(), &le->metablob, targeti); // do this after predirty_*, to avoid funky extra dnl arg - dn->push_projected_linkage(targeti->ino(), targeti->d_type()); + if (newi) { + // journal allocated referent inode and push the linkage with referent inode + journal_allocated_inos(mdr, &le->metablob); + dn->push_projected_linkage(newi, targeti->ino(), newi->ino()); + } else { + dn->push_projected_linkage(targeti->ino(), targeti->d_type()); + } journal_and_reply(mdr, targeti, dn, le, - new C_MDS_link_local_finish(this, mdr, dn, targeti, dnpv, tipv, adjust_realm)); + new C_MDS_link_local_finish(this, mdr, dn, targeti, newi, dnpv, tipv, adjust_realm)); } -void Server::_link_local_finish(const MDRequestRef& mdr, CDentry *dn, CInode *targeti, +void Server::_link_local_finish(const MDRequestRef& mdr, CDentry *dn, CInode *targeti, CInode *referenti, version_t dnpv, version_t tipv, bool adjust_realm) { dout(10) << "_link_local_finish " << *dn << " to " << *targeti << dendl; @@ -7771,6 +7809,12 @@ void Server::_link_local_finish(const MDRequestRef& mdr, CDentry *dn, CInode *ta dn->link_remote(dnl, targeti); dn->mark_dirty(dnpv, mdr->ls); + if (referenti) { + // dirty referent inode + referenti->mark_dirty(mdr->ls); + referenti->mark_dirty_parent(mdr->ls, true); + } + // target inode mdr->apply(); diff --git a/src/mds/Server.h b/src/mds/Server.h index bc1a15d35a6e6..ac09c00ffa44c 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -209,7 +209,7 @@ public: bool _check_access(Session *session, CInode *in, unsigned mask, int caller_uid, int caller_gid, int setattr_uid, int setattr_gid); CDentry *prepare_stray_dentry(const MDRequestRef& mdr, CInode *in); CInode* prepare_new_inode(const MDRequestRef& mdr, CDir *dir, inodeno_t useino, unsigned mode, - const file_layout_t *layout=nullptr); + const file_layout_t *layout=nullptr, bool referent_inode=false); void journal_allocated_inos(const MDRequestRef& mdr, EMetaBlob *blob); void apply_allocated_inos(const MDRequestRef& mdr, Session *session); @@ -279,7 +279,7 @@ public: // link void handle_client_link(const MDRequestRef& mdr); void _link_local(const MDRequestRef& mdr, CDentry *dn, CInode *targeti, SnapRealm *target_realm); - void _link_local_finish(const MDRequestRef& mdr, CDentry *dn, CInode *targeti, + void _link_local_finish(const MDRequestRef& mdr, CDentry *dn, CInode *targeti, CInode *referenti, version_t, version_t, bool); void _link_remote(const MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targeti); diff --git a/src/mds/StrayManager.cc b/src/mds/StrayManager.cc index 48d4b1e509234..4cc64cffd2260 100644 --- a/src/mds/StrayManager.cc +++ b/src/mds/StrayManager.cc @@ -583,7 +583,7 @@ void StrayManager::eval_remote(CDentry *remote_dn) dout(10) << __func__ << " " << *remote_dn << dendl; CDentry::linkage_t *dnl = remote_dn->get_projected_linkage(); - ceph_assert(dnl->is_remote()); + ceph_assert(dnl->is_remote() || dnl->is_referent_remote()); CInode *in = dnl->get_inode(); if (!in) { @@ -614,7 +614,7 @@ class C_RetryEvalRemote : public StrayManagerContext { dn->get(CDentry::PIN_PTRWAITER); } void finish(int r) override { - if (dn->get_projected_linkage()->is_remote()) + if (dn->get_projected_linkage()->is_remote() || dn->get_projected_linkage()->is_referent_remote()) sm->eval_remote(dn); dn->put(CDentry::PIN_PTRWAITER); } -- 2.39.5