From a5bc3f80e12199e33d16b13edeeea887d9053709 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 13 Jan 2009 16:33:44 -0800 Subject: [PATCH] mds: project dentry linkage --- src/mds/CDentry.cc | 45 +++++++++++---- src/mds/CDentry.h | 94 ++++++++++++++++++++++--------- src/mds/CDir.cc | 40 +++++++------ src/mds/CInode.cc | 45 +++++++++++---- src/mds/CInode.h | 23 +++++--- src/mds/Locker.cc | 8 +++ src/mds/Locker.h | 1 + src/mds/MDCache.cc | 134 +++++++++++++++++++++++--------------------- src/mds/MDCache.h | 3 +- src/mds/Migrator.cc | 4 +- src/mds/Server.cc | 111 ++++++++++++++---------------------- src/mds/journal.cc | 2 +- 12 files changed, 303 insertions(+), 207 deletions(-) diff --git a/src/mds/CDentry.cc b/src/mds/CDentry.cc index 7a56e4984fa08..e1a1941644822 100644 --- a/src/mds/CDentry.cc +++ b/src/mds/CDentry.cc @@ -119,8 +119,8 @@ void CDentry::print(ostream& out) inodeno_t CDentry::get_ino() { - if (inode) - return inode->ino(); + if (get_inode()) + return get_inode()->ino(); return inodeno_t(); } @@ -259,21 +259,46 @@ void CDentry::make_anchor_trace(vector& trace, CInode *in) void CDentry::link_remote(CInode *in) { assert(is_remote()); - assert(in->ino() == remote_ino); + assert(in->ino() == get_remote_ino()); - inode = in; + linkage.inode = in; in->add_remote_parent(this); } void CDentry::unlink_remote() { assert(is_remote()); - assert(inode); + assert(linkage.inode); - inode->remove_remote_parent(this); - inode = 0; + linkage.inode->remove_remote_parent(this); + linkage.inode = 0; } +void CDentry::push_projected_linkage(CInode *inode) +{ + _project_linkage()->inode = inode; + inode->push_projected_parent(this); +} + +void CDentry::pop_projected_linkage() +{ + assert(projected.size()); + + linkage_t& n = projected.front(); + if (n.inode) { + dir->link_primary_inode(this, n.inode); + n.inode->pop_projected_parent(); + } else if (n.remote_ino) + dir->link_remote_inode(this, n.remote_ino, n.remote_d_type); + + assert(n.inode == linkage.inode); + assert(n.remote_ino == linkage.remote_ino); + assert(n.remote_d_type == linkage.remote_d_type); + + projected.pop_front(); +} + + // ---------------------------- // auth pins @@ -364,7 +389,7 @@ void CDentry::decode_replica(bufferlist::iterator& p, bool is_new) if (is_null()) dir->link_remote_inode(this, rino, rdtype); else - assert(is_remote() && remote_ino == rino); + assert(is_remote() && linkage.remote_ino == rino); } __s32 ls; @@ -394,12 +419,12 @@ void CDentry::encode_lock_state(int type, bufferlist& bl) if (is_primary()) { c = 1; ::encode(c, bl); - ::encode(inode->inode.ino, bl); + ::encode(get_inode()->inode.ino, bl); } else if (is_remote()) { c = 2; ::encode(c, bl); - ::encode(remote_ino, bl); + ::encode(get_remote_ino(), bl); } else if (is_null()) { // encode nothing. diff --git a/src/mds/CDentry.h b/src/mds/CDentry.h index 3dbeb14a8511c..3e192f1ffab8c 100644 --- a/src/mds/CDentry.h +++ b/src/mds/CDentry.h @@ -86,14 +86,36 @@ public: return dentry_key_t(last, name.c_str()); } +public: + struct linkage_t { + CInode *inode; + inodeno_t remote_ino; + unsigned char remote_d_type; + + linkage_t() : inode(0), remote_ino(0), remote_d_type(0) {} + + // dentry type is primary || remote || null + // inode ptr is required for primary, optional for remote, undefined for null + bool is_primary() { return remote_ino == 0 && inode != 0; } + bool is_remote() { return remote_ino > 0; } + bool is_null() { return (remote_ino == 0 && inode == 0) ? true:false; } + + CInode *get_inode() { return inode; } + inodeno_t get_remote_ino() { return remote_ino; } + unsigned char get_remote_d_type() { return remote_d_type; } + + void set_remote(inodeno_t ino, unsigned char d_type) { + remote_ino = ino; + remote_d_type = d_type; + inode = 0; + } + }; + protected: - inodeno_t remote_ino; // if remote dentry - unsigned char remote_d_type; - - CInode *inode; // linked inode (if any) - CInode *projected_inode; // projected inode (if any) CDir *dir; // containing dirfrag - + linkage_t linkage; + list projected; + version_t version; // dir version when last touched. version_t projected_version; // what it will be when i unlock/commit. @@ -116,15 +138,13 @@ public: // lock SimpleLock lock; - public: // cons CDentry(const nstring& n, snapid_t f, snapid_t l) : name(n), first(f), last(l), - remote_ino(0), remote_d_type(0), - inode(0), projected_inode(0), dir(0), + dir(0), version(0), projected_version(0), xlist_dirty(this), auth_pins(0), nested_auth_pins(0), nested_anchors(0), @@ -133,27 +153,51 @@ public: snapid_t f, snapid_t l) : name(n), first(f), last(l), - remote_ino(ino), remote_d_type(dt), - inode(0), projected_inode(0), dir(0), + dir(0), version(0), projected_version(0), xlist_dirty(this), auth_pins(0), nested_auth_pins(0), nested_anchors(0), - lock(this, CEPH_LOCK_DN, WAIT_LOCK_OFFSET, 0) { } + lock(this, CEPH_LOCK_DN, WAIT_LOCK_OFFSET, 0) { + linkage.remote_ino = ino; + linkage.remote_d_type = dt; + } - CInode *get_inode() const { return inode; } CDir *get_dir() const { return dir; } const nstring& get_name() const { return name; } + + CInode *get_inode() const { return linkage.inode; } + inodeno_t get_remote_ino() { return linkage.remote_ino; } + unsigned char get_remote_d_type() { return linkage.remote_d_type; } + inodeno_t get_ino(); - inodeno_t get_remote_ino() { return remote_ino; } - unsigned char get_remote_d_type() { return remote_d_type; } - void set_remote(inodeno_t ino, unsigned char d_type) { - remote_ino = ino; - remote_d_type = d_type; + linkage_t *get_linkage() { return &linkage; } + + linkage_t *_project_linkage() { + projected.push_back(linkage_t()); + return &projected.back(); + } + void push_projected_linkage() { + _project_linkage(); } + void push_projected_linkage(inodeno_t ino, char d_type) { + linkage_t *p = _project_linkage(); + p->remote_ino = ino; + p->remote_d_type = d_type; + } + void push_projected_linkage(CInode *inode); + void pop_projected_linkage(); + + bool is_projected() { return projected.size(); } - CInode *get_projected_inode() const { return projected_inode; } - void set_projected_inode(CInode *i) { assert(!projected_inode); projected_inode = i; } + linkage_t *get_projected_linkage() { + if (projected.size()) + return &projected.back(); + return &linkage; + } + CInode *get_projected_inode() { + return get_projected_linkage()->inode; + } // ref counts: pin ourselves in the LRU when we're pinned. void first_get() { @@ -174,9 +218,9 @@ public: // dentry type is primary || remote || null // inode ptr is required for primary, optional for remote, undefined for null - bool is_primary() { return remote_ino == 0 && inode != 0; } - bool is_remote() { return remote_ino > 0; } - bool is_null() { return (remote_ino == 0 && inode == 0) ? true:false; } + bool is_primary() { return linkage.is_primary(); } + bool is_remote() { return linkage.is_remote(); } + bool is_null() { return linkage.is_null(); } // remote links void link_remote(CInode *in); @@ -212,8 +256,8 @@ public: __u32 nonce = add_replica(mds); ::encode(nonce, bl); ::encode(first, bl); - ::encode(remote_ino, bl); - ::encode(remote_d_type, bl); + ::encode(linkage.remote_ino, bl); + ::encode(linkage.remote_d_type, bl); __s32 ls = lock.get_replica_state(); ::encode(ls, bl); } diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 5be844b4e6f52..ac36a862bfa51 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -263,7 +263,11 @@ CDentry* CDir::add_primary_dentry(const nstring& dname, CInode *in, //assert(null_items.count(dn->name) == 0); items[dn->key()] = dn; - link_inode_work( dn, in ); + + dn->get_linkage()->inode = in; + in->set_primary_parent(dn); + + link_inode_work(dn, in); dout(12) << "add_primary_dentry " << *dn << dendl; @@ -318,7 +322,7 @@ void CDir::remove_dentry(CDentry *dn) // there should be no client leases at this point! assert(dn->client_lease_map.empty()); - if (dn->inode) { + if (dn->get_inode()) { // detach inode and dentry unlink_inode_work(dn); } else { @@ -358,7 +362,8 @@ void CDir::link_remote_inode(CDentry *dn, inodeno_t ino, unsigned char d_type) dout(12) << "link_remote_inode " << *dn << " remote " << ino << dendl; assert(dn->is_null()); - dn->set_remote(ino, d_type); + dn->get_linkage()->set_remote(ino, d_type); + if (dn->last == CEPH_NOSNAP) { num_head_items++; num_head_null--; @@ -374,7 +379,10 @@ void CDir::link_primary_inode(CDentry *dn, CInode *in) dout(12) << "link_primary_inode " << *dn << " " << *in << dendl; assert(dn->is_null()); - link_inode_work(dn,in); + dn->get_linkage()->inode = in; + in->set_primary_parent(dn); + + link_inode_work(dn, in); if (dn->last == CEPH_NOSNAP) num_head_null--; @@ -386,10 +394,8 @@ void CDir::link_primary_inode(CDentry *dn, CInode *in) void CDir::link_inode_work( CDentry *dn, CInode *in) { - assert(dn->inode == 0); - dn->inode = in; - dn->projected_inode = 0; - in->set_primary_parent(dn); + assert(dn->get_inode() == in); + assert(in->get_parent_dn() == dn); if (dn->last == CEPH_NOSNAP) num_head_items++; @@ -420,7 +426,7 @@ void CDir::unlink_inode( CDentry *dn ) if (dn->is_remote()) { dout(12) << "unlink_inode " << *dn << dendl; } else { - dout(12) << "unlink_inode " << *dn << " " << *dn->inode << dendl; + dout(12) << "unlink_inode " << *dn << " " << *dn->get_inode() << dendl; } unlink_inode_work(dn); @@ -432,6 +438,7 @@ void CDir::unlink_inode( CDentry *dn ) assert(get_num_any() == items.size()); } + void CDir::try_remove_unlinked_dn(CDentry *dn) { assert(dn->dir == this); @@ -468,19 +475,18 @@ void CDir::try_remove_unlinked_dn(CDentry *dn) } } } - void CDir::unlink_inode_work( CDentry *dn ) { - CInode *in = dn->inode; + CInode *in = dn->get_inode(); if (dn->is_remote()) { // remote if (in) dn->unlink_remote(); - dn->set_remote(0, 0); + dn->get_linkage()->set_remote(0, 0); } else { // primary assert(dn->is_primary()); @@ -498,7 +504,7 @@ void CDir::unlink_inode_work( CDentry *dn ) // detach inode in->remove_primary_parent(dn); - dn->inode = 0; + dn->get_linkage()->inode = 0; } if (dn->last == CEPH_NOSNAP) @@ -540,8 +546,8 @@ void CDir::purge_stale_snap_data(const set& snaps) if (p == snaps.end() || *p > dn->last) { dout(10) << " purging " << *dn << dendl; - if (dn->is_primary() && dn->inode->is_dirty()) - dn->inode->mark_clean(); + if (dn->is_primary() && dn->get_inode()->is_dirty()) + dn->get_inode()->mark_clean(); remove_dentry(dn); } } @@ -1415,8 +1421,8 @@ void CDir::_commit(version_t want) set::const_iterator p = snaps->lower_bound(dn->first); if (p == snaps->end() || *p > dn->last) { dout(10) << " purging " << *dn << dendl; - if (dn->is_primary() && dn->inode->is_dirty()) - dn->inode->mark_clean(); + if (dn->is_primary() && dn->get_inode()->is_dirty()) + dn->get_inode()->mark_clean(); remove_dentry(dn); continue; } diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 1092cf65ac749..6871e17f889b1 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -51,8 +51,9 @@ ostream& CInode::print_db_line_prefix(ostream& out) ostream& operator<<(ostream& out, CInode& in) { - filepath path; - in.make_path(path); + string path; + in.make_path_string_projected(path); + out << "[inode " << in.inode.ino; out << " [" << (in.is_multiversion() ? "...":"") @@ -418,10 +419,13 @@ bool CInode::is_ancestor_of(CInode *other) return false; } -void CInode::make_path_string(string& s) +void CInode::make_path_string(string& s, bool force, CDentry *use_parent) { - if (parent) { - parent->make_path_string(s); + if (!force) + use_parent = parent; + + if (use_parent) { + use_parent->make_path_string(s); } else if (is_root()) { s = ""; // root @@ -433,7 +437,28 @@ void CInode::make_path_string(string& s) s += n; } else { - s = "(dangling)"; // dangling + char n[20]; + sprintf(n, "#%llx", (unsigned long long)(ino())); + s += n; + } +} +void CInode::make_path_string_projected(string& s) +{ + make_path_string(s); + + if (projected_parent.size()) { + string q; + q.swap(s); + s = "{" + q; + for (list::iterator p = projected_parent.begin(); + p != projected_parent.end(); + p++) { + string q; + make_path_string(q, true, *p); + s += " "; + s += q; + } + s += "}"; } } @@ -467,10 +492,10 @@ void CInode::name_stray_dentry(string& dname) version_t CInode::pre_dirty() { - assert(parent || projected_parent); + assert(parent || projected_parent.size()); version_t pv; - if (projected_parent) - pv = projected_parent->pre_dirty(get_projected_version()); + if (projected_parent.size()) + pv = projected_parent.front()->pre_dirty(get_projected_version()); else pv = parent->pre_dirty(); dout(10) << "pre_dirty " << pv << " (current v " << inode.version << ")" << dendl; @@ -494,7 +519,7 @@ void CInode::mark_dirty(version_t pv, LogSegment *ls) { dout(10) << "mark_dirty " << *this << dendl; - assert(parent || projected_parent); + assert(parent || projected_parent.size()); /* NOTE: I may already be dirty, but this fn _still_ needs to be called so that diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 0dbde44deebab..f4fda0347e8f6 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -230,7 +230,8 @@ public: // parent dentries in cache CDentry *parent; // primary link set remote_parents; // if hard linked - CDentry *projected_parent; // for in-progress rename + + list projected_parent; // for in-progress rename, (un)link, etc. pair inode_auth; @@ -286,7 +287,7 @@ private: last_journaled(0), last_open_journaled(0), //hack_accessed(true), stickydir_ref(0), - parent(0), projected_parent(0), + parent(0), inode_auth(CDIR_AUTH_DEFAULT), replica_caps_wanted(0), xlist_dirty(this), xlist_caps(this), xlist_open_file(this), @@ -342,7 +343,7 @@ private: inode_t& get_inode() { return inode; } CDentry* get_parent_dn() { return parent; } - CDentry* get_projected_parent_dn() { return projected_parent ? projected_parent:parent; } + CDentry* get_projected_parent_dn() { return projected_parent.size() ? projected_parent.back():parent; } CDir *get_parent_dir(); CInode *get_parent_inode(); @@ -358,7 +359,8 @@ private: // -- misc -- bool is_ancestor_of(CInode *other); - void make_path_string(string& s); + void make_path_string(string& s, bool force=false, CDentry *use_parent=NULL); + void make_path_string_projected(string& s); void make_path(filepath& s); void make_anchor_trace(vector& trace); void name_stray_dentry(string& dname); @@ -757,10 +759,6 @@ public: void set_primary_parent(CDentry *p) { assert(parent == 0); parent = p; - if (projected_parent) { - assert(projected_parent == p); - projected_parent = 0; - } } void remove_primary_parent(CDentry *dn) { assert(dn == parent); @@ -772,6 +770,15 @@ public: return remote_parents.size(); } + void push_projected_parent(CDentry *dn) { + projected_parent.push_back(dn); + } + void pop_projected_parent() { + assert(projected_parent.size()); + parent = projected_parent.front(); + projected_parent.pop_front(); + } + void print(ostream& out); }; diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 9dc987631a40c..d336b84c822a5 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -342,6 +342,14 @@ bool Locker::acquire_locks(MDRequest *mdr, } +void Locker::set_xlocks_done(Mutation *mut) +{ + for (set::iterator p = mut->xlocks.begin(); + p != mut->xlocks.end(); + p++) + (*p)->set_xlock_done(); +} + void Locker::drop_locks(Mutation *mut) { // leftover locks diff --git a/src/mds/Locker.h b/src/mds/Locker.h index a81475dbed277..8c7d583faf46f 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -83,6 +83,7 @@ public: set &xlocks); void drop_locks(Mutation *mut); + void set_xlocks_done(Mutation *mut); void eval_gather(SimpleLock *lock); void eval_cap_gather(CInode *in); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 9030a4fd5fe66..c1723f2b8ecbe 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -1051,9 +1051,9 @@ void MDCache::journal_cow_dentry(Mutation *mut, EMetaBlob *metablob, CDentry *dn // nothing to cow on a null dentry, fix caller assert(!dn->is_null()); - if (dn->is_primary() && dn->inode->is_multiversion()) { + if (dn->is_primary() && dn->get_inode()->is_multiversion()) { // multiversion inode. - CInode *in = dn->inode; + CInode *in = dn->get_inode(); if (follows == CEPH_NOSNAP) follows = in->find_snaprealm()->get_newest_snap(); @@ -1093,8 +1093,8 @@ void MDCache::journal_cow_dentry(Mutation *mut, EMetaBlob *metablob, CDentry *dn dout(10) << " dn " << *dn << dendl; if (dn->is_primary()) { - assert(oldfirst == dn->inode->first); - CInode *oldin = cow_inode(dn->inode, follows); + assert(oldfirst == dn->get_inode()->first); + CInode *oldin = cow_inode(dn->get_inode(), follows); mut->add_cow_inode(oldin); if (pcow_inode) *pcow_inode = oldin; @@ -1104,7 +1104,7 @@ void MDCache::journal_cow_dentry(Mutation *mut, EMetaBlob *metablob, CDentry *dn mut->add_cow_dentry(olddn); } else { assert(dn->is_remote()); - CDentry *olddn = dn->dir->add_remote_dentry(dn->name, dn->remote_ino, dn->remote_d_type, + CDentry *olddn = dn->dir->add_remote_dentry(dn->name, dn->get_remote_ino(), dn->get_remote_d_type(), oldfirst, follows); dout(10) << " olddn " << *olddn << dendl; metablob->add_remote_dentry(olddn, true); @@ -2352,23 +2352,23 @@ void MDCache::recalc_auth_bits() if (dn->is_primary()) { // inode if (auth) - dn->inode->state_set(CInode::STATE_AUTH); + dn->get_inode()->state_set(CInode::STATE_AUTH); else { - dn->inode->state_set(CInode::STATE_REJOINING); - dn->inode->state_clear(CInode::STATE_AUTH); - if (dn->inode->is_dirty()) - dn->inode->mark_clean(); + dn->get_inode()->state_set(CInode::STATE_REJOINING); + dn->get_inode()->state_clear(CInode::STATE_AUTH); + if (dn->get_inode()->is_dirty()) + dn->get_inode()->mark_clean(); // avoid touching scatterlocks for our subtree roots! - if (subtree_inodes.count(dn->inode) == 0) { - dn->inode->filelock.clear_updated(); - dn->inode->nestlock.clear_updated(); - dn->inode->dirfragtreelock.clear_updated(); + if (subtree_inodes.count(dn->get_inode()) == 0) { + dn->get_inode()->filelock.clear_updated(); + dn->get_inode()->nestlock.clear_updated(); + dn->get_inode()->dirfragtreelock.clear_updated(); } } // recurse? - if (dn->inode->is_dir()) - dn->inode->get_nested_dirfrags(dfq); + if (dn->get_inode()->is_dir()) + dn->get_inode()->get_nested_dirfrags(dfq); } } } @@ -2611,7 +2611,7 @@ void MDCache::rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin) CDentry *dn = p->second; dout(15) << " add_weak_primary_dentry " << *dn << dendl; assert(dn->is_primary()); - assert(dn->inode->is_dir()); + assert(dn->get_inode()->is_dir()); rejoin->add_weak_primary_dentry(dir->dirfrag(), dn->name.c_str(), dn->first, dn->last, dn->get_inode()->ino()); dn->get_inode()->get_nested_dirfrags(nested); } @@ -3829,7 +3829,7 @@ void MDCache::rejoin_send_acks() if (!dn->is_primary()) continue; // inode - CInode *in = dn->inode; + CInode *in = dn->get_inode(); for (map::iterator r = in->replicas_begin(); r != in->replicas_end(); @@ -4031,7 +4031,7 @@ void MDCache::_recovered(CInode *in, int r) file_recovering.erase(in); in->state_clear(CInode::STATE_RECOVERING); - if (!in->parent && !in->projected_parent) { + if (!in->get_parent_dn() && !in->get_projected_parent_dn()) { dout(10) << " inode has no parents, killing it off" << dendl; in->auth_unpin(this); remove_inode(in); @@ -5170,8 +5170,7 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, // who vector& trace, // result snapid_t *psnapid, CInode **psnapdiri, bool follow_trailing_symlink, // how - int onfail, - bool allow_projected) + int onfail) { assert(mdr || req); bool null_okay = (onfail == MDS_TRAVERSE_DISCOVERXLOCK); @@ -5311,24 +5310,25 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, // who CDentry *dn = curdir->lookup(path[depth], snapid); bool use_projected = false; - if (allow_projected && - dn && - dn->is_null() && - dn->lock.is_xlocked() && - dn->lock.can_rdlock(mdr, client) && - dn->projected_inode) - use_projected = true; + CDentry::linkage_t *dnl = NULL; + if (dn) { + if (dn->lock.is_xlocked() && + dn->lock.can_rdlock(mdr, client)) { + use_projected = true; + dnl = dn->get_projected_linkage(); + } else + dnl = dn->get_linkage(); + } // null and last_bit and xlocked by me? - if (dn && dn->is_null()) { + if (dnl && dnl->is_null()) { if (null_okay) { dout(10) << "traverse: hit null dentry at tail of traverse, succeeding" << dendl; trace.push_back(dn); break; // done! } if (dn->lock.is_xlocked() && - dn->lock.get_xlocked_by() != mdr && - !use_projected) { + dn->lock.get_xlocked_by() != mdr) { dout(10) << "traverse: xlocked null dentry at " << *dn << dendl; dn->lock.add_waiter(SimpleLock::WAIT_RD, _get_waiter(mdr, req)); if (mds->logger) mds->logger->inc("tlock"); @@ -5337,7 +5337,7 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, // who } - if (dn && (!dn->is_null() || use_projected)) { + if (dnl && !dnl->is_null()) { // dentry exists. xlocked? if (!noperm && dn->lock.is_xlocked() && @@ -5349,20 +5349,22 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, // who return 1; } - CInode *in = use_projected ? dn->projected_inode : dn->inode; + CInode *in = dnl->get_inode(); // do we have inode? if (!in) { - assert(dn->is_remote()); + assert(dnl->is_remote()); // do i have it? - in = get_inode(dn->get_remote_ino()); + in = get_inode(dnl->get_remote_ino()); if (in) { - dout(7) << "linking in remote in " << *in << dendl; - dn->link_remote(in); + if (!use_projected) { + dout(7) << "linking in remote in " << *in << dendl; + dn->link_remote(in); + } } else { dout(7) << "remote link to " << dn->get_remote_ino() << ", which i don't have" << dendl; assert(mdr); // we shouldn't hit non-primary dentries doing a non-mdr traversal! - open_remote_ino(dn->get_remote_ino(), _get_waiter(mdr, req)); + open_remote_ino(dnl->get_remote_ino(), _get_waiter(mdr, req)); if (mds->logger) mds->logger->inc("trino"); return 1; } @@ -5676,23 +5678,27 @@ void MDCache::open_remote_dirfrag(CInode *diri, frag_t approxfg, Context *fin) */ CInode *MDCache::get_dentry_inode(CDentry *dn, MDRequest *mdr, bool projected) { - if (projected && dn->projected_inode) - return dn->projected_inode; + CDentry::linkage_t *l; + if (projected) + l = dn->get_projected_linkage(); + else + l = dn->get_linkage(); - assert(!dn->is_null()); + assert(!l->is_null()); - if (dn->is_primary()) - return dn->inode; + if (l->is_primary()) + return l->inode; - assert(dn->is_remote()); - CInode *in = get_inode(dn->get_remote_ino()); + assert(l->is_remote()); + CInode *in = get_inode(l->get_remote_ino()); if (in) { dout(7) << "get_dentry_inode linking in remote in " << *in << dendl; - dn->link_remote(in); + if (!projected) + dn->link_remote(in); return in; } else { dout(10) << "get_dentry_inode on remote dn, opening inode for " << *dn << dendl; - open_remote_ino(dn->get_remote_ino(), new C_MDS_RetryRequest(this, mdr)); + open_remote_ino(l->remote_ino, new C_MDS_RetryRequest(this, mdr)); return 0; } } @@ -6445,7 +6451,7 @@ void MDCache::eval_stray(CDentry *dn) dout(10) << "eval_stray " << *dn << dendl; dout(10) << " inode is " << *dn->get_inode() << dendl; assert(dn->is_primary()); - CInode *in = dn->inode; + CInode *in = dn->get_inode(); assert(in); if (!dn->is_auth()) return; // has to be mine @@ -6515,7 +6521,7 @@ public: void MDCache::purge_stray(CDentry *dn) { - CInode *in = dn->inode; + CInode *in = dn->get_inode(); dout(10) << "purge_stray " << *dn << " " << *in << dendl; assert(!dn->is_replicated()); @@ -6557,7 +6563,7 @@ void MDCache::_purge_stray_purged(CDentry *dn) { dout(10) << "_purge_stray_purged " << *dn << dendl; - CInode *in = dn->inode; + CInode *in = dn->get_inode(); // kill dentry. version_t pdv = dn->pre_dirty(); @@ -6572,13 +6578,13 @@ void MDCache::_purge_stray_purged(CDentry *dn) void MDCache::_purge_stray_logged(CDentry *dn, version_t pdv, LogSegment *ls) { - dout(10) << "_purge_stray_logged " << *dn << " " << *dn->inode << dendl; + dout(10) << "_purge_stray_logged " << *dn << " " << *dn->get_inode() << dendl; dn->state_clear(CDentry::STATE_PURGING); dn->put(CDentry::PIN_PURGING); // unlink and remove dentry - CInode *in = dn->inode; + CInode *in = dn->get_inode(); if (in->is_dirty()) in->mark_clean(); if (in->state_test(CInode::STATE_RECOVERING)) @@ -6616,7 +6622,7 @@ void MDCache::reintegrate_stray(CDentry *straydn, CDentry *rdn) void MDCache::migrate_stray(CDentry *dn, int from, int to) { dout(10) << "migrate_stray from mds" << from << " to mds" << to - << " " << *dn << " " << *dn->inode << dendl; + << " " << *dn << " " << *dn->get_inode() << dendl; // rename it to another mds. string dname; @@ -7060,16 +7066,16 @@ void MDCache::handle_discover(MDiscover *dis) } // frozen inode? - if (dn->is_primary() && dn->inode->is_frozen()) { + if (dn->is_primary() && dn->get_inode()->is_frozen()) { if (tailitem && dis->wants_xlocked()) { - dout(7) << "handle_discover allowing discovery of frozen tail " << *dn->inode << dendl; + dout(7) << "handle_discover allowing discovery of frozen tail " << *dn->get_inode() << dendl; } else if (reply->is_empty()) { - dout(7) << *dn->inode << " is frozen, empty reply, waiting" << dendl; - dn->inode->add_waiter(CDir::WAIT_UNFREEZE, new C_MDS_RetryMessage(mds, dis)); + dout(7) << *dn->get_inode() << " is frozen, empty reply, waiting" << dendl; + dn->get_inode()->add_waiter(CDir::WAIT_UNFREEZE, new C_MDS_RetryMessage(mds, dis)); delete reply; return; } else { - dout(7) << *dn->inode << " is frozen, non-empty reply, stopping" << dendl; + dout(7) << *dn->get_inode() << " is frozen, non-empty reply, stopping" << dendl; break; } } @@ -7083,7 +7089,7 @@ void MDCache::handle_discover(MDiscover *dis) if (!dn->is_primary()) break; // stop on null or remote link. // add inode - CInode *next = dn->inode; + CInode *next = dn->get_inode(); assert(next->is_auth()); replicate_inode(next, dis->get_asker(), reply->trace); @@ -7382,12 +7388,12 @@ CInode *MDCache::add_replica_inode(bufferlist::iterator& p, CDentry *dn, listdecode_replica(p, false); dout(10) << "add_replica_inode had " << *in << dendl; - assert(!dn || dn->inode == in); + assert(!dn || dn->get_inode() == in); } if (dn) { assert(dn->is_primary()); - assert(dn->inode == in); + assert(dn->get_inode() == in); dn->get_dir()->take_ino_waiting(in->ino(), finished); } @@ -7577,7 +7583,7 @@ void MDCache::handle_dentry_unlink(MDentryUnlink *m) // open inode? if (dn->is_primary()) { - CInode *in = dn->inode; + CInode *in = dn->get_inode(); dn->dir->unlink_inode(dn); assert(straydn); straydn->dir->link_primary_inode(straydn, in); @@ -8183,8 +8189,8 @@ void MDCache::show_cache() ++p) { CDentry *dn = p->second; dout(7) << " dentry " << *dn << dendl; - if (dn->is_primary() && dn->inode) - dout(7) << " inode " << *dn->inode << dendl; + if (dn->is_primary() && dn->get_inode()) + dout(7) << " inode " << *dn->get_inode() << dendl; } } } diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index cec5e65ef5a79..cbde0ad6c9057 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -857,8 +857,7 @@ public: int path_traverse(MDRequest *mdr, Message *req, filepath& path, vector& trace, snapid_t *psnap, CInode **psnapdiri, bool follow_trailing_sym, - int onfail, - bool allow_projected=false); + int onfail); bool path_is_mine(filepath& path); bool path_is_mine(string& p) { filepath path(p); diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 287cbd3616ce1..cbad72cf68ba4 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -1994,8 +1994,8 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist::iterator& blp, int o decode_import_inode_caps(in, blp, cap_imports); // link before state -- or not! -sage - if (dn->inode != in) { - assert(!dn->inode); + if (dn->get_inode() != in) { + assert(!dn->get_inode()); dn->dir->link_primary_inode(dn, in); } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 78bb3460d5a6e..97ca4eab3dcf0 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -525,6 +525,9 @@ void Server::early_reply(MDRequest *mdr, CInode *tracei, CDentry *tracedn) return; } + // mark xlocks "done", indicating that we are exposing uncommitted changes + mds->locker->set_xlocks_done(mdr); + MClientRequest *req = mdr->client_request; entity_inst_t client_inst = req->get_orig_source_inst(); if (client_inst.name.is_mds()) @@ -663,7 +666,7 @@ void Server::encode_null_lease(bufferlist& bl) */ void Server::set_trace_dist(Session *session, MClientReply *reply, CInode *in, CDentry *dn, snapid_t snapid, CInode *snapdiri, - bool projected, MDRequest *mdr) + bool expose_projected_inode, MDRequest *mdr) { // inode, dentry, dir, ..., inode bufferlist bl; @@ -694,7 +697,7 @@ void Server::set_trace_dist(Session *session, MClientReply *reply, CInode *in, C dout(10) << "set_trace_dist snaprealm " << *realm << dendl; } - in->encode_inodestat(bl, session, snapid, projected); + in->encode_inodestat(bl, session, snapid, expose_projected_inode); dout(20) << "set_trace_dist added snapid " << snapid << " " << *in << dendl; if (snapid != CEPH_NOSNAP && in == snapdiri) { @@ -717,11 +720,9 @@ void Server::set_trace_dist(Session *session, MClientReply *reply, CInode *in, C } if (!dn) { - if (projected && mdr) { - dn = in->get_projected_parent_dn(); - if (dn && mdr->locks.count(&dn->lock) == 0) // only use projected value if we've locked it! - dn = NULL; - } + dn = in->get_projected_parent_dn(); + if (dn && dn->lock.is_xlocked() && !dn->lock.can_rdlock(0, client)) + dn = NULL; // can't use projected parent if (!dn) dn = in->get_parent_dn(); } @@ -729,6 +730,8 @@ void Server::set_trace_dist(Session *session, MClientReply *reply, CInode *in, C goto done; dentry: + expose_projected_inode = false; + ::encode(dn->get_name(), bl); if (snapid == CEPH_NOSNAP) lmask = mds->locker->issue_client_lease(dn, client, bl, now, session); @@ -1434,8 +1437,7 @@ CDir *Server::traverse_to_auth_dir(MDRequest *mdr, vector &trace, file snapid_t snapid; int r = mdcache->path_traverse(mdr, mdr->client_request, refpath, trace, &snapid, &mdr->ref_snapdiri, - false, MDS_TRAVERSE_FORWARD, - true); + false, MDS_TRAVERSE_FORWARD); if (r > 0) return 0; // delayed if (r < 0) { reply_request(mdr, r); @@ -1486,8 +1488,7 @@ CInode* Server::rdlock_path_pin_ref(MDRequest *mdr, int r = mdcache->path_traverse(mdr, req, refpath, trace, &mdr->ref_snapid, &mdr->ref_snapdiri, - req->follow_trailing_symlink(), MDS_TRAVERSE_FORWARD, - true); + req->follow_trailing_symlink(), MDS_TRAVERSE_FORWARD); if (r > 0) return false; // delayed if (r < 0) { // error reply_request(mdr, r); @@ -2201,9 +2202,14 @@ void Server::handle_client_readdir(MDRequest *mdr) CDentry *dn = it->second; it++; - if (dn->is_null() && - !(dn->get_projected_inode() && - dn->lock.can_rdlock(mdr, client))) continue; + CDentry::linkage_t *dnl = 0; + if (dn->lock.can_rdlock(mdr, client)) + dnl = dn->get_projected_linkage(); + else + dnl = dn->get_linkage(); + + if (dnl->is_null()) + continue; if (snaps && dn->last != CEPH_NOSNAP) { set::const_iterator p = snaps->lower_bound(dn->first); if (p == snaps->end() || *p > dn->last) { @@ -2214,17 +2220,15 @@ void Server::handle_client_readdir(MDRequest *mdr) if (dn->last < snapid || dn->first > snapid) continue; - CInode *in = dn->get_inode(); - if (dn->get_projected_inode() && - dn->lock.can_rdlock(mdr, client)) - in = dn->get_projected_inode(); + CInode *in = dnl->get_inode(); // remote link? // better for the MDS to do the work, if we think the client will stat any of these files. - if (dn->is_remote() && !in) { - in = mdcache->get_inode(dn->get_remote_ino()); + if (dnl->is_remote() && !in) { + in = mdcache->get_inode(dnl->get_remote_ino()); if (in) { - dn->link_remote(in); + if (!dn->is_projected()) + dn->link_remote(in); } else if (dn->state_test(CDentry::STATE_BADREMOTEINO)) { dout(10) << "skipping bad remote ino on " << *dn << dendl; continue; @@ -2295,7 +2299,7 @@ public: assert(r == 0); // link the inode - dn->get_dir()->link_primary_inode(dn, newi); + dn->pop_projected_linkage(); // dirty inode, dn, dir newi->mark_dirty(newi->inode.version + 1, mdr->ls); @@ -2335,8 +2339,7 @@ void Server::handle_client_mknod(MDRequest *mdr) CInode *newi = prepare_new_inode(mdr, dn->get_dir(), inodeno_t(req->head.ino)); assert(newi); - newi->projected_parent = dn; - dn->set_projected_inode(newi); + dn->push_projected_linkage(newi); newi->inode.rdev = req->head.args.mknod.rdev; newi->inode.mode = req->head.args.mknod.mode; @@ -2359,9 +2362,6 @@ void Server::handle_client_mknod(MDRequest *mdr) PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); le->metablob.add_primary_dentry(dn, true, newi); - // allow the same client rdlock|lease the dentry - dn->lock.set_xlock_done(); - early_reply(mdr, newi, 0); // log + wait @@ -2390,8 +2390,7 @@ void Server::handle_client_mkdir(MDRequest *mdr) assert(newi); // it's a directory. - newi->projected_parent = dn; - dn->set_projected_inode(newi); + dn->push_projected_linkage(newi); newi->inode.mode = req->head.args.mkdir.mode; newi->inode.mode &= ~S_IFMT; @@ -2434,9 +2433,6 @@ void Server::handle_client_mkdir(MDRequest *mdr) LogSegment *ls = mds->mdlog->get_current_segment(); ls->open_files.push_back(&newi->xlist_open_file); - // allow the same client rdlock|lease the dentry - dn->lock.set_xlock_done(); - early_reply(mdr, newi, 0); // log + wait @@ -2462,8 +2458,7 @@ void Server::handle_client_symlink(MDRequest *mdr) assert(newi); // it's a symlink - newi->projected_parent = dn; - dn->set_projected_inode(newi); + dn->push_projected_linkage(newi); newi->inode.mode &= ~S_IFMT; newi->inode.mode |= S_IFLNK; @@ -2484,9 +2479,6 @@ void Server::handle_client_symlink(MDRequest *mdr) mdcache->predirty_journal_parents(mdr, &le->metablob, newi, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); le->metablob.add_primary_dentry(dn, true, newi); - // allow the same client rdlock|lease the dentry - dn->lock.set_xlock_done(); - early_reply(mdr, newi, 0); // log + wait @@ -2523,7 +2515,7 @@ void Server::handle_client_link(MDRequest *mdr) vector targettrace; int r = mdcache->path_traverse(mdr, req, targetpath, targettrace, NULL, NULL, - false, MDS_TRAVERSE_DISCOVER, true); + false, MDS_TRAVERSE_DISCOVER); if (r > 0) return; // wait if (targettrace.empty()) r = -EINVAL; if (r < 0) { @@ -2641,8 +2633,6 @@ void Server::_link_local(MDRequest *mdr, CDentry *dn, CInode *targeti) le->metablob.add_remote_dentry(dn, true, targeti->ino(), targeti->d_type()); // new remote mdcache->journal_dirty_inode(mdr, &le->metablob, targeti); - dn->lock.set_xlock_done(); - early_reply(mdr, targeti, dn); mdlog->submit_entry(le, new C_MDS_link_local_finish(mds, mdr, dn, targeti, dnpv, tipv)); @@ -2654,7 +2644,7 @@ void Server::_link_local_finish(MDRequest *mdr, CDentry *dn, CInode *targeti, dout(10) << "_link_local_finish " << *dn << " to " << *targeti << dendl; // link and unlock the NEW dentry - dn->get_dir()->link_remote_inode(dn, targeti); + dn->pop_projected_linkage(); dn->mark_dirty(dnpv, mdr->ls); // target inode @@ -2748,8 +2738,6 @@ void Server::_link_remote(MDRequest *mdr, bool inc, CDentry *dn, CInode *targeti // mark committing (needed for proper recovery) mdr->committing = true; - dn->lock.set_xlock_done(); - early_reply(mdr, targeti, dn); // log + wait @@ -2766,7 +2754,7 @@ void Server::_link_remote_finish(MDRequest *mdr, bool inc, if (inc) { // link the new dentry - dn->get_dir()->link_remote_inode(dn, targeti); + dn->pop_projected_linkage(); dn->mark_dirty(dpv, mdr->ls); } else { // unlink main dentry @@ -3236,8 +3224,9 @@ void Server::_unlink_local(MDRequest *mdr, CDentry *dn, CDentry *straydn) EUpdate *le = new EUpdate(mdlog, "unlink_local"); le->metablob.add_client_req(mdr->reqid); - if (dn->is_primary()) - dn->get_inode()->projected_parent = straydn; + if (dn->is_primary()) { + straydn->push_projected_linkage(dn->get_inode()); + } // the unlinked dentry dn->pre_dirty(); @@ -3276,8 +3265,6 @@ void Server::_unlink_local(MDRequest *mdr, CDentry *dn, CDentry *straydn) if (mdr->more()->dst_reanchor_atid) le->metablob.add_table_transaction(TABLE_ANCHOR, mdr->more()->dst_reanchor_atid); - dn->lock.set_xlock_done(); - early_reply(mdr, 0, dn); // log + wait @@ -3291,13 +3278,12 @@ void Server::_unlink_local_finish(MDRequest *mdr, dout(10) << "_unlink_local_finish " << *dn << dendl; // unlink main dentry - CInode *in = dn->get_inode(); dn->get_dir()->unlink_inode(dn); // relink as stray? (i.e. was primary link?) if (straydn) { dout(20) << " straydn is " << *straydn << dendl; - straydn->get_dir()->link_primary_inode(straydn, in); + straydn->pop_projected_linkage(); SnapRealm *oldparent = dn->get_dir()->inode->find_snaprealm(); @@ -3783,9 +3769,6 @@ void Server::handle_client_rename(MDRequest *mdr) // mark committing (needed for proper recovery) mdr->committing = true; - srcdn->lock.set_xlock_done(); - destdn->lock.set_xlock_done(); - early_reply(mdr, srci, destdn); // log + wait @@ -3913,7 +3896,7 @@ void Server::_rename_prepare(MDRequest *mdr, tpi = destdn->get_inode()->project_inode(); tpi->version = straydn->pre_dirty(tpi->version); } - destdn->get_inode()->projected_parent = straydn; + straydn->push_projected_linkage(destdn->get_inode()); } else if (destdn->is_remote()) { // nlink-- targeti if (destdn->get_inode()->is_auth()) { @@ -3949,8 +3932,7 @@ void Server::_rename_prepare(MDRequest *mdr, pi = srcdn->get_inode()->project_inode(); pi->version = mdr->more()->pvmap[destdn] = destdn->pre_dirty(oldpv); } - srcdn->get_inode()->projected_parent = destdn; - destdn->set_projected_inode(srcdn->get_inode()); + destdn->push_projected_linkage(srcdn->get_inode()); } // src @@ -4092,7 +4074,7 @@ void Server::_rename_apply(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDen assert(straydn); dout(10) << "straydn is " << *straydn << dendl; destdn->get_dir()->unlink_inode(destdn); - straydn->get_dir()->link_primary_inode(straydn, oldin); + straydn->pop_projected_linkage(); if (straydn->is_auth()) { SnapRealm *oldparent = destdn->get_dir()->inode->find_snaprealm(); @@ -4121,7 +4103,7 @@ void Server::_rename_apply(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDen if (srcdn->is_remote()) { if (!linkmerge) { srcdn->get_dir()->unlink_inode(srcdn); - destdn->get_dir()->link_remote_inode(destdn, in); + destdn->pop_projected_linkage(); destdn->link_remote(in); if (destdn->is_auth()) destdn->mark_dirty(mdr->more()->pvmap[destdn], mdr->ls); @@ -4137,7 +4119,7 @@ void Server::_rename_apply(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDen destdn->get_dir()->unlink_inode(destdn); } srcdn->get_dir()->unlink_inode(srcdn); - destdn->get_dir()->link_primary_inode(destdn, in); + destdn->pop_projected_linkage(); // srcdn inode import? if (!srcdn->is_auth() && destdn->is_auth()) { @@ -5068,16 +5050,13 @@ public: void finish(int r) { assert(r == 0); - dn->get_dir()->link_primary_inode(dn, newi); + dn->pop_projected_linkage(); // dirty inode, dn, dir newi->mark_dirty(newi->inode.version + 1, mdr->ls); mdr->apply(); - // downgrade xlock to rdlock - //mds->locker->dentry_xlock_downgrade_to_rdlock(dn, mdr); - // set/pin ref inode for open() mdr->ref = newi; mdr->ref_snapid = CEPH_NOSNAP; @@ -5129,8 +5108,7 @@ void Server::handle_client_openc(MDRequest *mdr) assert(in); // it's a file. - in->projected_parent = dn; - dn->set_projected_inode(in); + dn->push_projected_linkage(in); in->inode.mode = req->head.args.open.mode; in->inode.mode |= S_IFREG; @@ -5165,9 +5143,6 @@ void Server::handle_client_openc(MDRequest *mdr) LogSegment *ls = mds->mdlog->get_current_segment(); ls->open_files.push_back(&in->xlist_open_file); - // early reply? - dn->lock.set_xlock_done(); - early_reply(mdr, in, 0); // log + wait diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 85c5b05d890fa..82b12d3a2ac76 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -449,7 +449,7 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg) dout(10) << "EMetaBlob.replay unlinking " << *dn << dendl; dir->unlink_inode(dn); } - dn->set_remote(p->ino, p->d_type); + dn->get_linkage()->set_remote(p->ino, p->d_type); dn->set_version(p->dnv); if (p->dirty) dn->_mark_dirty(logseg); dout(10) << "EMetaBlob.replay for [" << p->dnfirst << "," << p->dnlast << "] had " << *dn << dendl; -- 2.39.5