From: Patrick Donnelly Date: Wed, 19 Apr 2017 02:28:39 +0000 (-0400) Subject: mds: prevent export of pinned inodes X-Git-Tag: v12.0.3~38^2~22 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=df340e820d0fe964a4e8a71a86aaa92cad25b91f;p=ceph.git mds: prevent export of pinned inodes This adds a chain of linked lists to CInode which can be followed to CInodes that are export pinned to this rank. Signed-off-by: Patrick Donnelly --- diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 92c2735d2c74..d551f1837e1e 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -4433,10 +4433,15 @@ void CInode::set_export_pin(mds_rank_t rank) assert(is_dir()); assert(is_projected()); get_projected_inode()->export_pin = rank; + if (rank == mdcache->mds->get_nodeid()) { + link_export_pin(); + } else { + unlink_export_pin(); + } maybe_export_pin(); } -mds_rank_t CInode::get_export_pin(void) const +mds_rank_t CInode::get_export_pin(bool inherit) const { /* An inode that is export pinned may not necessarily be a subtree root, we * need to traverse the parents. A base or system inode cannot be pinned. @@ -4448,6 +4453,8 @@ mds_rank_t CInode::get_export_pin(void) const if (pin >= 0) { return pin; } + if (!inherit) + break; } return MDS_RANK_NONE; } @@ -4463,3 +4470,40 @@ bool CInode::is_exportable(mds_rank_t dest) const return true; } } + +void CInode::unlink_export_pin(void) +{ + if (is_dir()) { + auto list = export_pin_parent_link.get_list(); + if (list) { + CDentry *entry = get_projected_parent_dn(); + assert(entry); + CInode *parent = entry->dir->inode; + assert(parent && list == &parent->export_pin_list); + export_pin_parent_link.remove_myself(); + if (list->empty()) { + parent->unlink_export_pin(); + } + } + } +} + +/* Because an inode may be pinned to us **but not a subtree**, it is necessary + * to be able to convert this inode (actually, its fragments) to subtrees if + * this inode's parent is exported. This chain of linked lists keeps track of + * pinned children. + */ +void CInode::link_export_pin(void) +{ + if (is_dir()) { + mds_rank_t pin = get_projected_inode()->export_pin; + if (pin >= 0 && pin == mdcache->mds->get_nodeid()) { + unlink_export_pin(); + CDentry *cde = get_projected_parent_dn(); + while (cde) { + cde->dir->inode->export_pin_list.push_front(&cde->get_projected_linkage()->inode->export_pin_parent_link); + cde = cde->dir->inode->get_projected_parent_dn(); + } + } + } +} diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 9ca709652f9b..fc9622d0303f 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -680,7 +680,8 @@ public: nestlock(this, &nestlock_type), flocklock(this, &flocklock_type), policylock(this, &policylock_type), - loner_cap(-1), want_loner_cap(-1) + loner_cap(-1), want_loner_cap(-1), + export_pin_parent_link(this) { state = 0; if (auth) state_set(STATE_AUTH); @@ -691,6 +692,8 @@ public: clear_file_locks(); assert(num_projected_xattrs == 0); assert(num_projected_srnodes == 0); + assert(export_pin_list.empty()); + unlink_export_pin(); } @@ -1067,9 +1070,12 @@ public: void set_primary_parent(CDentry *p) { assert(parent == 0); parent = p; + link_export_pin(); } void remove_primary_parent(CDentry *dn) { assert(dn == parent); + if (projected_parent.empty()) + unlink_export_pin(); parent = 0; } void add_remote_parent(CDentry *p); @@ -1080,6 +1086,7 @@ public: void push_projected_parent(CDentry *dn) { projected_parent.push_back(dn); + link_export_pin(); /* fix export pin links */ } void pop_projected_parent() { assert(projected_parent.size()); @@ -1091,9 +1098,16 @@ private: void maybe_export_pin(); public: void set_export_pin(mds_rank_t rank); - mds_rank_t get_export_pin(void) const; + mds_rank_t get_export_pin(bool inherit=true) const; bool is_exportable(mds_rank_t dest) const; +private: + xlist export_pin_list; + xlist::item export_pin_parent_link; +public: + void link_export_pin(void); + void unlink_export_pin(void); + void print(ostream& out) override; void dump(Formatter *f) const; diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 56a208853e95..dfe914772e97 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -869,6 +869,25 @@ void MDCache::adjust_subtree_auth(CDir *dir, mds_authority_t auth, bool do_eval) show_subtrees(); } +void MDCache::split_export_pins(CInode *dir) +{ + assert(dir->is_dir()); + for (CInode *in : dir->export_pin_list) { + mds_rank_t export_pin = in->get_export_pin(false); + if (export_pin >= 0 && export_pin == mds->get_nodeid()) { + dout(7) << "splitting fragments of " << *in << dendl; + std::list ls; + in->get_dirfrags(ls); + for (auto &cd : ls) { + if (cd->is_full_dir_auth()) { + adjust_subtree_auth(cd, mds_authority_t(mds->get_nodeid(), CDIR_AUTH_UNKNOWN)); + } + } + } else { + split_export_pins(in); + } + } +} void MDCache::try_subtree_merge(CDir *dir) { diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 558ab0c5aff5..7e7696407824 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -1175,6 +1175,11 @@ public: void repair_dirfrag_stats(CDir *dir); public: + void split_export_pins(CDir *dir) { + return split_export_pins(dir->inode); + } + void split_export_pins(CInode *dir); + /* Because exports may fail, this set lets us keep track of inodes that need exporting. */ std::set export_pin_queue; }; diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index b0273ccd1422..9e43b8624e7c 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -1035,6 +1035,10 @@ void Migrator::export_frozen(CDir *dir, uint64_t tid) // note the bounds. // force it into a subtree by listing auth as . cache->adjust_subtree_auth(dir, mds->get_nodeid(), mds->get_nodeid()); + + /* force export pinned children into separate subtrees */ + cache->split_export_pins(dir); + set bounds; cache->get_subtree_bounds(dir, bounds); @@ -1542,16 +1546,6 @@ uint64_t Migrator::encode_export_dir(bufferlist& exportbl, continue; } - /* XXX The inode may be pinned to me (in->get_inode().export_pin) but it is - * not a subtree by the time I've found it here. So, keeping it is - * difficult as we've already notified the importer of the subtree bounds - * (MExportDirPrep). Creating a new subtree for this pinned inode would - * probably require widespread changes and is not worth the effort since - * the importer will simply export this inode and its subtrees back to us - * (Migrator::decode_import_inode). This should be rare enough to not - * justify mucking with things here. - */ - // primary link // -- inode exportbl.append("I", 1); // inode dentry