From f13e6a4bd797c61cafe3d346d3652bafaacc9b49 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 23 Feb 2011 13:01:08 -0800 Subject: [PATCH] mds: refragment dirs when inode dirfragtree updates from journal Force dir fragmentation specified by dirfragtree when replayed from the journal. Example: mds0 is auth for /foo, mds1 is auth for /foo/bar. mds1 fragments /foo/bar. journals etc. mds0 gets fragment notify and the in-memory inode's dirfragtree changes. mds0 journals the /foo/bar inode for some random reason. mds0 imports /foo/bar. On replay, mds0 refragments upon first mention of the new fragtree in the journal, so that the dirfragtree <-> dir frags always match. Confusion is avoided when we, say, import /foo/bar. Signed-off-by: Sage Weil --- src/include/frag.h | 7 +++++++ src/mds/CInode.cc | 21 +++++++++++++++++++++ src/mds/CInode.h | 1 + src/mds/events/EMetaBlob.h | 19 +------------------ src/mds/journal.cc | 24 ++++++++++++++++++++++++ 5 files changed, 54 insertions(+), 18 deletions(-) diff --git a/src/include/frag.h b/src/include/frag.h index 14afb948781b3..866c60b4d4cc7 100644 --- a/src/include/frag.h +++ b/src/include/frag.h @@ -495,6 +495,13 @@ public: }; WRITE_CLASS_ENCODER(fragtree_t) +inline bool operator==(const fragtree_t& l, const fragtree_t& r) { + return l._splits == r._splits; +} +inline bool operator!=(const fragtree_t& l, const fragtree_t& r) { + return l._splits != r._splits; +} + inline std::ostream& operator<<(std::ostream& out, fragtree_t& ft) { out << "fragtree_t("; diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 4ecdddd85bcf1..772024db6bb9c 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -473,6 +473,27 @@ void CInode::verify_dirfrags() assert(!bad); } +void CInode::force_dirfrags() +{ + bool bad = false; + for (map::iterator p = dirfrags.begin(); p != dirfrags.end(); ++p) { + if (!dirfragtree.is_leaf(p->first)) { + dout(0) << "have open dirfrag " << p->first << " but not leaf in " << dirfragtree + << ": " << *p->second << dendl; + bad = true; + } + } + + if (bad) { + list leaves; + dirfragtree.get_leaves(leaves); + for (list::iterator p = leaves.begin(); p != leaves.end(); ++p) + mdcache->get_force_dirfrag(dirfrag_t(ino(),*p)); + } + + verify_dirfrags(); +} + CDir *CInode::get_approx_dirfrag(frag_t fg) { CDir *dir = get_dirfrag(fg); diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 2cf264c627c51..cb3242a5864fd 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -380,6 +380,7 @@ public: void close_dirfrags(); bool has_subtree_root_dirfrag(); + void force_dirfrags(); void verify_dirfrags(); void get_stickydirs(); diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h index eacbafb7d0e18..9ab124fa5e527 100644 --- a/src/mds/events/EMetaBlob.h +++ b/src/mds/events/EMetaBlob.h @@ -133,24 +133,7 @@ public: ::decode(dirty, bl); } - void update_inode(CInode *in) { - in->inode = inode; - in->xattrs = xattrs; - if (in->inode.is_dir()) { - in->dirfragtree = dirfragtree; - delete in->default_layout; - in->default_layout = dir_layout; - dir_layout = NULL; - /* - * we can do this before linking hte inode bc the split_at would - * be a no-op.. we have no children (namely open snaprealms) to - * divy up - */ - in->decode_snap_blob(snapbl); - } else if (in->inode.is_symlink()) { - in->symlink = symlink; - } - } + void update_inode(CInode *in); void print(ostream& out) { out << " fullbit dn " << dn << " [" << dnfirst << "," << dnlast << "] dnv " << dnv diff --git a/src/mds/journal.cc b/src/mds/journal.cc index eb4fbd8407a44..e25521c1aca0b 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -360,6 +360,30 @@ void EMetaBlob::update_segment(LogSegment *ls) // ls->last_client_tid[client_reqs.rbegin()->client] = client_reqs.rbegin()->tid); } +void EMetaBlob::fullbit::update_inode(CInode *in) +{ + in->inode = inode; + in->xattrs = xattrs; + if (in->inode.is_dir()) { + if (!(in->dirfragtree == dirfragtree)) { + in->dirfragtree = dirfragtree; + in->force_dirfrags(); + } + + delete in->default_layout; + in->default_layout = dir_layout; + dir_layout = NULL; + /* + * we can do this before linking hte inode bc the split_at would + * be a no-op.. we have no children (namely open snaprealms) to + * divy up + */ + in->decode_snap_blob(snapbl); + } else if (in->inode.is_symlink()) { + in->symlink = symlink; + } +} + void EMetaBlob::replay(MDS *mds, LogSegment *logseg) { dout(10) << "EMetaBlob.replay " << lump_map.size() << " dirlumps by " << client_name << dendl; -- 2.39.5