From: Sage Weil Date: Tue, 27 May 2008 14:24:02 +0000 (-0700) Subject: mds: some remote rename fixes, more to come X-Git-Tag: v0.3~170^2~46 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=4c445663f2fa3e8e071cbe9a3bb879c6063f488a;p=ceph.git mds: some remote rename fixes, more to come --- diff --git a/src/TODO b/src/TODO index 6c788357110..e0a9be2cfe7 100644 --- a/src/TODO +++ b/src/TODO @@ -70,6 +70,10 @@ mds nested - fix rejoin vs updated dirfrag nested/dirlocks mds mustfix +- rollback vs accounting is just broken! can we avoid it. + - even with _just_ parent dir mtime, the rollback metablob isn't sufficient during replay.. we need special handling regardless. +- when renaming a dir, how do we know if we need to update the anchor table? + - replay of dir fragmentation (dont want dir frozen, pins, etc.?) - fix rm -r vs mds exports - rename slave in-memory rollback on failure diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 66933365984..4b7cd3efee0 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -1232,6 +1232,14 @@ void Locker::predirty_nested(Mutation *mut, EMetaBlob *blob, parent = in->get_projected_parent_dn()->get_dir(); } + if (flags == 0) { + dout(10) << " no flags, just adding dir context to blob(s)" << dendl; + blob->add_dir_context(parent); + if (rollback) + rollback->add_dir_context(parent); + return; + } + inode_t *curi = in->get_projected_inode(); __s64 drbytes = 1, drfiles = 0, drsubdirs = 0; @@ -1343,7 +1351,7 @@ void Locker::predirty_nested(Mutation *mut, EMetaBlob *blob, blob->add_dir_context(parent); blob->add_dir(parent, true); if (rollback) - blob->add_dir_context(parent); + rollback->add_dir_context(parent); for (list::iterator p = lsi.begin(); p != lsi.end(); p++) { diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 375571c678c..3507b2e47c8 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -3223,7 +3223,7 @@ void Server::handle_client_rename(MDRequest *mdr) } // -- prepare anchor updates -- - if (!linkmerge) { + if (!linkmerge || srcdn->is_primary()) { C_Gather *anchorgather = 0; if (srcdn->is_primary() && srcdn->inode->is_anchored() && @@ -3371,7 +3371,8 @@ version_t Server::_rename_prepare_import(MDRequest *mdr, CDentry *srcdn, bufferl void Server::_rename_prepare(MDRequest *mdr, EMetaBlob *metablob, bufferlist *client_map_bl, - CDentry *srcdn, CDentry *destdn, CDentry *straydn) + CDentry *srcdn, CDentry *destdn, CDentry *straydn, + EMetaBlob *rollback) { dout(10) << "_rename_prepare " << *mdr << " " << *srcdn << " " << *destdn << dendl; if (straydn) dout(10) << " straydn " << *straydn << dendl; @@ -3458,18 +3459,18 @@ void Server::_rename_prepare(MDRequest *mdr, // sub off target int predirty_dir = silent ? 0:PREDIRTY_DIR; if (!linkmerge && destdn->is_primary()) - mds->locker->predirty_nested(mdr, metablob, destdn->inode, destdn->dir, PREDIRTY_PRIMARY|predirty_dir, -1); + mds->locker->predirty_nested(mdr, metablob, destdn->inode, destdn->dir, PREDIRTY_PRIMARY|predirty_dir, -1, + rollback); if (destdn->dir == srcdn->dir) { // same dir. don't update nested info or adjust counts. - mds->locker->predirty_nested(mdr, metablob, srcdn->inode, srcdn->dir, - false, true); + mds->locker->predirty_nested(mdr, metablob, srcdn->inode, srcdn->dir, predirty_dir, 0, rollback); } else { // different dir. update nested accounting. int flags = srcdn->is_primary() ? PREDIRTY_PRIMARY:0; flags |= predirty_dir; if (srcdn->is_auth()) - mds->locker->predirty_nested(mdr, metablob, srcdn->inode, srcdn->dir, flags, -1); - mds->locker->predirty_nested(mdr, metablob, srcdn->inode, destdn->dir, flags, 1); + mds->locker->predirty_nested(mdr, metablob, srcdn->inode, srcdn->dir, flags, -1, rollback); + mds->locker->predirty_nested(mdr, metablob, srcdn->inode, destdn->dir, flags, 1, rollback); } } @@ -3682,8 +3683,10 @@ void Server::handle_slave_rename_prep(MDRequest *mdr) mdr->pin(srcdn->inode); // stray? + bool linkmerge = (srcdn->inode == destdn->inode && + (srcdn->is_primary() || destdn->is_primary())); CDentry *straydn = 0; - if (destdn->is_primary()) { + if (destdn->is_primary() && !linkmerge) { assert(mdr->slave_request->stray.length() > 0); straydn = mdcache->add_replica_stray(mdr->slave_request->stray, destdn->inode, mdr->slave_to_mds); @@ -3746,22 +3749,20 @@ void Server::handle_slave_rename_prep(MDRequest *mdr) mdr->ls = mdlog->get_current_segment(); ESlaveUpdate *le = new ESlaveUpdate(mdlog, "slave_rename_prep", mdr->reqid, mdr->slave_to_mds, ESlaveUpdate::OP_PREPARE); + // commit case + bufferlist blah; + _rename_prepare(mdr, &le->commit, &blah, srcdn, destdn, straydn, &le->rollback); + // rollback case if (destdn->inode && destdn->inode->is_auth()) { assert(destdn->is_remote()); - le->rollback.add_dir_context(destdn->dir); le->rollback.add_dentry(destdn, true); } if (srcdn->is_auth() || (srcdn->inode && srcdn->inode->is_auth())) { - le->rollback.add_dir_context(srcdn->dir); le->rollback.add_dentry(srcdn, true); } - // commit case - bufferlist blah; - _rename_prepare(mdr, &le->commit, &blah, srcdn, destdn, straydn); - mdlog->submit_entry(le, new C_MDS_SlaveRenamePrep(this, mdr, srcdn, destdn, straydn)); } else { // don't journal. diff --git a/src/mds/Server.h b/src/mds/Server.h index 12c7b856065..01ebc3b9412 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -169,7 +169,8 @@ public: version_t _rename_prepare_import(MDRequest *mdr, CDentry *srcdn, bufferlist *client_map_bl); void _rename_prepare(MDRequest *mdr, EMetaBlob *metablob, bufferlist *client_map_bl, - CDentry *srcdn, CDentry *destdn, CDentry *straydn); + CDentry *srcdn, CDentry *destdn, CDentry *straydn, + EMetaBlob *rollback=0); void _rename_apply(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn); // slaving diff --git a/src/vstartnew.sh b/src/vstartnew.sh index 6fc0eab805b..73f7b03b845 100755 --- a/src/vstartnew.sh +++ b/src/vstartnew.sh @@ -49,7 +49,7 @@ done # mds $CEPH_BIN/cmds $ARGS --debug_ms 1 --debug_mds 20 --mds_thrash_fragments 0 --mds_thrash_exports 0 #--debug_ms 20 -#$CEPH_BIN/cmds $ARGS --debug_ms 1 --debug_mds 20 --mds_thrash_fragments 0 --mds_thrash_exports 0 #--debug_ms 20 +$CEPH_BIN/cmds $ARGS --debug_ms 1 --debug_mds 20 --mds_thrash_fragments 0 --mds_thrash_exports 0 #--debug_ms 20 ./cmonctl mds set_max_mds 2 echo "started. stop.sh to stop. see out/* (e.g. 'tail -f out/????') for debug output."