During replay we trim non-auth inodes on EExport or EImportFinish abort.
Subtree trimming may be delayed, too.
Skip parents if the diri is in the same blob, or if it is journaled in the
current segment *and* it is in a subtree that is unambiguously auth. We can't
easily be more precise than that because the actual event we care about on
replay is EExport, but the migrator doesn't twiddle auth bits to false until
later.
Also, reset last_journaled on import.
This fixes replay bugs like
2011-04-13 18:15:18.064029
7f65588ef710 mds1.journal EImportStart.replay
10000000015 bounds []
2011-04-13 18:15:18.064034
7f65588ef710 mds1.journal EMetaBlob.replay 2 dirlumps by unknown0
2011-04-13 18:15:18.064040
7f65588ef710 mds1.journal EMetaBlob.replay dir
10000000010
2011-04-13 18:15:18.064046
7f65588ef710 mds1.journal EMetaBlob.replay missing dir ino
10000000010
mds/journal.cc: In function 'void EMetaBlob::replay(MDS*, LogSegment*)', in thread '0x7f65588ef710'
mds/journal.cc: 407: FAILED assert(0)
ceph version
0.25-683-g653580a (commit:
653580ae84c471c34872f14a0308c78af71f7243)
1: (ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x53) [0xa53d26]
2: (EMetaBlob::replay(MDS*, LogSegment*)+0x7eb) [0x7a737d]
Fixes: #994
Signed-off-by: Sage Weil <sage@newdream.net>
void Migrator::decode_import_inode(CDentry *dn, bufferlist::iterator& blp, int oldauth,
- LogSegment *ls,
+ LogSegment *ls, uint64_t log_offset,
map<CInode*, map<client_t,Capability::Export> >& cap_imports,
list<ScatterLock*>& updated_scatterlocks)
{
// state after link -- or not! -sage
in->decode_import(blp, ls); // cap imports are noted for later action
+ // note that we are journaled at this log offset
+ in->last_journaled = log_offset;
+
// caps
decode_import_inode_caps(in, blp, cap_imports);
}
else if (icode == 'I') {
// inode
- decode_import_inode(dn, blp, oldauth, ls, cap_imports, updated_scatterlocks);
+ decode_import_inode(dn, blp, oldauth, ls, le->get_start_off(), cap_imports, updated_scatterlocks);
}
// add dentry to journal entry
public:
void decode_import_inode(CDentry *dn, bufferlist::iterator& blp, int oldauth,
- LogSegment *ls,
+ LogSegment *ls, uint64_t log_offset,
map<CInode*, map<client_t,Capability::Export> >& cap_imports,
list<ScatterLock*>& updated_scatterlocks);
void decode_import_inode_caps(CInode *in,
list<ScatterLock*> updated_scatterlocks; // we clear_updated explicitly below
mdcache->migrator->decode_import_inode(srcdn, blp,
srcdn->authority().first,
- mdr->ls,
+ mdr->ls, 0,
mdr->more()->cap_imports, updated_scatterlocks);
srcdnl->get_inode()->filelock.clear_dirty();
srcdnl->get_inode()->nestlock.clear_dirty();
static const int TO_AUTH_SUBTREE_ROOT = 0; // default.
static const int TO_ROOT = 1;
- void add_dir_context(CDir *dir, int mode = TO_AUTH_SUBTREE_ROOT) {
- // already have this dir? (we must always add in order)
- if (lump_map.count(dir->dirfrag()))
- return;
-
- if (mode == TO_AUTH_SUBTREE_ROOT) {
- //return; // hack: for comparison purposes.. what if NO context?
-
- // subtree root?
- if (dir->is_subtree_root() && dir->is_auth())
- return;
-
- // was the inode journaled since the last subtree_map?
- if (//false && // for benchmarking
- last_subtree_map &&
- dir->inode->last_journaled >= last_subtree_map) {
- /*
- cout << " inode " << dir->inode->inode.ino
- << " last journaled at " << dir->inode->last_journaled
- << " and last_subtree_map is " << last_subtree_map
- << std::endl;
- */
- return;
- }
- }
-
- // stop at root/stray
- CInode *diri = dir->get_inode();
- if (!diri->get_projected_parent_dn())
- return;
-
- // journaled?
-
- // add parent dn
- CDentry *parent = diri->get_projected_parent_dn();
- add_dir_context(parent->get_dir(), mode);
- add_dentry(parent, false);
- }
-
-
-
+ void add_dir_context(CDir *dir, int mode = TO_AUTH_SUBTREE_ROOT);
void print(ostream& out) const {
out << "[metablob";
my_offset(mdlog ? mdlog->get_write_pos() : 0) //, _segment(0)
{ }
+void EMetaBlob::add_dir_context(CDir *dir, int mode)
+{
+ MDS *mds = dir->cache->mds;
+
+ list<CDentry*> parents;
+
+ // it may be okay not to include the maybe items, if
+ // - we journaled the maybe child inode in this segment
+ // - that subtree turns out to be unambiguously auth
+ list<CDentry*> maybe;
+ bool maybenot = false;
+
+ while (true) {
+ // already have this dir? (we must always add in order)
+ if (lump_map.count(dir->dirfrag())) {
+ dout(20) << "EMetaBlob::add_dir_context(" << dir << ") have lump " << dir->dirfrag() << dendl;
+ break;
+ }
+
+ // stop at root/stray
+ CInode *diri = dir->get_inode();
+ CDentry *parent = diri->get_projected_parent_dn();
+
+ if (!parent)
+ break;
+
+ if (mode == TO_AUTH_SUBTREE_ROOT) {
+ // subtree root?
+ if (dir->is_subtree_root()) {
+ if (dir->is_auth() && !dir->is_ambiguous_auth()) {
+ // it's an auth subtree, we don't need maybe (if any), and we're done.
+ dout(20) << "EMetaBlob::add_dir_context(" << dir << ") reached unambig auth subtree, don't need " << maybe
+ << " at " << *dir << dendl;
+ maybe.clear();
+ break;
+ } else {
+ dout(20) << "EMetaBlob::add_dir_context(" << dir << ") reached ambig or !auth subtree, need " << maybe
+ << " at " << *dir << dendl;
+ // we need the maybe list after all!
+ parents.splice(parents.begin(), maybe);
+ maybenot = false;
+ }
+ }
+
+ // was the inode journaled in this blob?
+ if (my_offset && diri->last_journaled == my_offset) {
+ dout(20) << "EMetaBlob::add_dir_context(" << dir << ") already have diri this blob " << *diri << dendl;
+ break;
+ }
+
+ // have we journaled this inode since the last subtree map?
+ if (!maybenot && last_subtree_map && diri->last_journaled >= last_subtree_map) {
+ dout(20) << "EMetaBlob::add_dir_context(" << dir << ") already have diri in this segment ("
+ << diri->last_journaled << " >= " << last_subtree_map << "), setting maybenot flag "
+ << *diri << dendl;
+ maybenot = true;
+ }
+ }
+
+ if (maybenot) {
+ dout(25) << "EMetaBlob::add_dir_context(" << dir << ") maybe " << *parent << dendl;
+ maybe.push_front(parent);
+ } else {
+ dout(25) << "EMetaBlob::add_dir_context(" << dir << ") definitely " << *parent << dendl;
+ parents.push_front(parent);
+ }
+
+ dir = parent->get_dir();
+ }
+
+ parents.splice(parents.begin(), maybe);
+
+ dout(20) << "EMetaBlob::add_dir_context final: " << parents << dendl;
+ for (list<CDentry*>::iterator p = parents.begin(); p != parents.end(); p++)
+ add_dentry(*p, false);
+}
+
void EMetaBlob::update_segment(LogSegment *ls)
{
// atids?