From: Sage Weil Date: Thu, 29 May 2008 20:23:24 +0000 (-0700) Subject: mds: do rollback. unlink done, untested. X-Git-Tag: v0.3~170^2~29 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=82199a31c5c93bf54d2cd06c0b737f37ed81003d;p=ceph.git mds: do rollback. unlink done, untested. --- diff --git a/src/include/encoding.h b/src/include/encoding.h index 10aee22210d..56a2f4c6366 100644 --- a/src/include/encoding.h +++ b/src/include/encoding.h @@ -78,6 +78,10 @@ WRITE_INTTYPE_ENCODER(s16, le16) inline void encode(const cl &c, bufferlist &bl) { c.encode(bl); } \ inline void decode(cl &c, bufferlist::iterator &p) { c.decode(p); } +#define WRITE_CLASS_ENCODER_MEMBER(cl) \ + inline void encode(const cl &c, bufferlist &bl) const { c.encode(bl); } \ + inline void decode(cl &c, bufferlist::iterator &p) { c.decode(p); } + // ----------------------------- diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index c5c89032fed..41be5b80edc 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -1216,16 +1216,17 @@ void Locker::revoke_client_leases(SimpleLock *lock) void Locker::predirty_nested(Mutation *mut, EMetaBlob *blob, CInode *in, CDir *parent, - int flags, int linkunlink, - EMetaBlob *rollback) + int flags, int linkunlink) { bool primary_dn = flags & PREDIRTY_PRIMARY; bool do_parent_mtime = flags & PREDIRTY_DIR; + bool shallow = flags & PREDIRTY_SHALLOW; dout(10) << "predirty_nested" << (do_parent_mtime ? " do_parent_mtime":"") << " linkunlink=" << linkunlink << (primary_dn ? " primary_dn":" remote_dn") + << (shallow ? " SHALLOW":"") << " " << *in << dendl; if (!parent) { @@ -1236,8 +1237,6 @@ void Locker::predirty_nested(Mutation *mut, EMetaBlob *blob, if (flags == 0 && linkunlink == 0) { dout(10) << " no flags/linkunlink, just adding dir context to blob(s)" << dendl; blob->add_dir_context(parent); - if (rollback) - rollback->add_dir_context(parent); return; } @@ -1362,18 +1361,12 @@ void Locker::predirty_nested(Mutation *mut, EMetaBlob *blob, assert(parent->is_auth()); blob->add_dir_context(parent); blob->add_dir(parent, true); - if (rollback) - rollback->add_dir_context(parent); for (list::iterator p = lsi.begin(); p != lsi.end(); p++) { CInode *cur = *p; inode_t *pi = cur->get_projected_inode(); blob->add_primary_dentry(cur->get_projected_parent_dn(), true, 0, pi); - if (rollback) { - inode_t *oldi = cur->get_previous_projected_inode(); - rollback->add_primary_dentry(cur->get_projected_parent_dn(), true, 0, oldi); - } } } diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 6ac71a19cef..8690c6a6aae 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -54,8 +54,10 @@ class ScatterLock; class LocalLock; class MDCache; -static const int PREDIRTY_PRIMARY = 1; // primary dn, adjust nested accounting -static const int PREDIRTY_DIR = 2; // update parent dir mtime/size +// flags for predirty_nested() +static const int PREDIRTY_PRIMARY = 1; // primary dn, adjust nested accounting +static const int PREDIRTY_DIR = 2; // update parent dir mtime/size +static const int PREDIRTY_SHALLOW = 4; // only go to immediate parrent (for easier rollback) class Locker { private: @@ -164,8 +166,7 @@ protected: public: void predirty_nested(Mutation *mut, EMetaBlob *blob, CInode *in, CDir *dir, - int flags, int linkunlink=0, - EMetaBlob *rollback=0); + int flags, int linkunlink=0); // local protected: diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 1c0431cabde..aab167b3f8c 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -1361,8 +1361,10 @@ void MDCache::maybe_resolve_finish() } else if (!need_resolve_ack.empty()) { dout(10) << "maybe_resolve_finish still waiting for resolve_ack from (" << need_resolve_ack << ")" << dendl; - } - else { + } + else if (!need_resolve_rollback.empty()) { + dout(10) << "maybe_resolve_finish still waiting for rollback to commit on (" << need_resolve_rollback << ")" << dendl; + } else { dout(10) << "maybe_resolve_finish got all resolves+resolve_acks, done." << dendl; disambiguate_imports(); if (mds->is_resolve()) { @@ -1386,11 +1388,11 @@ void MDCache::handle_resolve_ack(MMDSResolveAck *ack) if (mds->is_resolve()) { // replay assert(uncommitted_slave_updates[from].count(*p)); - uncommitted_slave_updates[from][*p]->commit.replay(mds); + // log commit + mds->mdlog->submit_entry(new ESlaveUpdate(mds->mdlog, "unknown", *p, from, + ESlaveUpdate::OP_COMMIT, uncommitted_slave_updates[from][*p]->origop)); delete uncommitted_slave_updates[from][*p]; uncommitted_slave_updates[from].erase(*p); - // log commit - mds->mdlog->submit_entry(new ESlaveUpdate(mds->mdlog, "unknown", *p, from, ESlaveUpdate::OP_COMMIT)); } else { MDRequest *mdr = request_get(*p); assert(mdr->slave_request == 0); // shouldn't be doing anything! @@ -1405,21 +1407,31 @@ void MDCache::handle_resolve_ack(MMDSResolveAck *ack) if (mds->is_resolve()) { assert(uncommitted_slave_updates[from].count(*p)); - uncommitted_slave_updates[from][*p]->rollback.replay(mds); + + // perform rollback (and journal a rollback entry) + // note: this will hold up the resolve a bit, until the rollback entries journal. + if (uncommitted_slave_updates[from][*p]->origop == ESlaveUpdate::LINK) + mds->server->do_link_rollback(uncommitted_slave_updates[from][*p]->rollback, 0); + else if (uncommitted_slave_updates[from][*p]->origop == ESlaveUpdate::RENAME) + mds->server->do_rename_rollback(uncommitted_slave_updates[from][*p]->rollback, 0); + else + assert(0); + delete uncommitted_slave_updates[from][*p]; uncommitted_slave_updates[from].erase(*p); - mds->mdlog->submit_entry(new ESlaveUpdate(mds->mdlog, "unknown", *p, from, ESlaveUpdate::OP_ROLLBACK)); } else { MDRequest *mdr = request_get(*p); if (mdr->more()->slave_commit) { - mdr->more()->slave_commit->finish(-1); - delete mdr->more()->slave_commit; + Context *fin = mdr->more()->slave_commit; mdr->more()->slave_commit = 0; + fin->finish(-1); + delete fin; + } else { + if (mdr->slave_request) + mdr->aborted = true; + else + request_finish(mdr); } - if (mdr->slave_request) - mdr->aborted = true; - else - request_finish(mdr); } } @@ -4617,9 +4629,11 @@ void MDCache::request_finish(MDRequest *mdr) // slave finisher? if (mdr->more()->slave_commit) { - mdr->more()->slave_commit->finish(0); - delete mdr->more()->slave_commit; + Context *fin = mdr->more()->slave_commit; mdr->more()->slave_commit = 0; + fin->finish(0); // this must re-call request_finish. + delete fin; + return; } if (mdr->client_request && mds->logger) { diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index a218618e7b2..901f75bed1e 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -248,6 +248,7 @@ struct MDRequest : public Mutation { // called when slave commits or aborts Context *slave_commit; + bufferlist rollback_bl; More() : src_reanchor_atid(0), dst_reanchor_atid(0), inode_import_v(0), @@ -295,15 +296,15 @@ struct MDRequest : public Mutation { struct MDSlaveUpdate { - EMetaBlob commit; - EMetaBlob rollback; + int origop; + bufferlist rollback; xlist::item xlistitem; Context *waiter; - MDSlaveUpdate() : xlistitem(this), waiter(0) {} - MDSlaveUpdate(EMetaBlob c, EMetaBlob r, xlist &list) : - commit(c), rollback(r), + MDSlaveUpdate(int oo, bufferlist &rbl, xlist &list) : + origop(oo), xlistitem(this), waiter(0) { + rollback.claim(rbl); list.push_back(&xlistitem); } ~MDSlaveUpdate() { @@ -471,6 +472,7 @@ protected: set wants_resolve; // nodes i need to send my resolve to set got_resolve; // nodes i got resolves from set need_resolve_ack; // nodes i need a resolve_ack from + set need_resolve_rollback; // rollbacks i'm writing to the journal void handle_resolve(MMDSResolve *m); void handle_resolve_ack(MMDSResolveAck *m); @@ -478,6 +480,15 @@ protected: void disambiguate_imports(); void recalc_auth_bits(); public: + void add_rollback(metareqid_t reqid) { + need_resolve_rollback.insert(reqid); + } + void finish_rollback(metareqid_t reqid) { + need_resolve_rollback.erase(reqid); + if (need_resolve_rollback.empty()) + maybe_resolve_finish(); + } + // ambiguous imports void add_ambiguous_import(dirfrag_t base, list& bounds); void add_ambiguous_import(CDir *base, const set& bounds); diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 0e9fffb06fe..de49b3a94cc 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -2349,14 +2349,12 @@ class C_MDS_SlaveLinkPrep : public Context { Server *server; MDRequest *mdr; CInode *targeti; - utime_t old_ctime; - bool inc; public: - C_MDS_SlaveLinkPrep(Server *s, MDRequest *r, CInode *t, utime_t oct, bool in) : - server(s), mdr(r), targeti(t), old_ctime(oct), inc(in) { } + C_MDS_SlaveLinkPrep(Server *s, MDRequest *r, CInode *t) : + server(s), mdr(r), targeti(t) { } void finish(int r) { assert(r == 0); - server->_logged_slave_link(mdr, targeti, old_ctime, inc); + server->_logged_slave_link(mdr, targeti); } }; @@ -2391,9 +2389,9 @@ void Server::handle_slave_link_prep(MDRequest *mdr) // journal it mdr->ls = mdlog->get_current_segment(); - ESlaveUpdate *le = new ESlaveUpdate(mdlog, "slave_link_prep", mdr->reqid, mdr->slave_to_mds, ESlaveUpdate::OP_PREPARE); + ESlaveUpdate *le = new ESlaveUpdate(mdlog, "slave_link_prep", mdr->reqid, mdr->slave_to_mds, + ESlaveUpdate::OP_PREPARE, ESlaveUpdate::LINK); - inode_t *oldi = dn->inode->get_projected_inode(); inode_t *pi = dn->inode->project_inode(); // update journaled target inode @@ -2405,44 +2403,45 @@ void Server::handle_slave_link_prep(MDRequest *mdr) inc = false; pi->nlink--; } - utime_t old_ctime = pi->ctime; + + link_rollback rollback; + rollback.reqid = mdr->reqid; + rollback.ino = targeti->ino(); + rollback.old_ctime = targeti->inode.ctime; // we hold versionlock; no concorrent projections + rollback.old_dir_rctime = targeti->get_parent_dn()->get_dir()->get_projected_fnode()->fragstat.rctime; + rollback.was_inc = inc; + ::encode(rollback, le->rollback); + mdr->more()->rollback_bl = le->rollback; + pi->ctime = mdr->now; pi->version = targeti->pre_dirty(); dout(10) << " projected inode " << pi << " v " << pi->version << dendl; // commit case - mds->locker->predirty_nested(mdr, &le->commit, dn->inode, 0, PREDIRTY_PRIMARY, 0, &le->rollback); + mds->locker->predirty_nested(mdr, &le->commit, dn->inode, 0, PREDIRTY_SHALLOW|PREDIRTY_PRIMARY, 0); le->commit.add_primary_dentry(dn, true, targeti, pi); // update old primary - le->rollback.add_primary_dentry(dn, true, targeti, oldi); - mdlog->submit_entry(le, new C_MDS_SlaveLinkPrep(this, mdr, targeti, old_ctime, inc)); + mdlog->submit_entry(le, new C_MDS_SlaveLinkPrep(this, mdr, targeti)); } class C_MDS_SlaveLinkCommit : public Context { Server *server; MDRequest *mdr; CInode *targeti; - utime_t old_ctime; - version_t old_version; - bool inc; public: - C_MDS_SlaveLinkCommit(Server *s, MDRequest *r, CInode *t, utime_t oct, version_t ov, bool in) : - server(s), mdr(r), targeti(t), old_ctime(oct), old_version(ov), inc(in) { } + C_MDS_SlaveLinkCommit(Server *s, MDRequest *r, CInode *t) : + server(s), mdr(r), targeti(t) { } void finish(int r) { - server->_commit_slave_link(mdr, r, targeti, - old_ctime, old_version, inc); + server->_commit_slave_link(mdr, r, targeti); } }; -void Server::_logged_slave_link(MDRequest *mdr, CInode *targeti, utime_t old_ctime, bool inc) +void Server::_logged_slave_link(MDRequest *mdr, CInode *targeti) { dout(10) << "_logged_slave_link " << *mdr - << " inc=" << inc << " " << *targeti << dendl; - version_t old_version = targeti->inode.version; - // update the target targeti->pop_and_dirty_projected_inode(mdr->ls); mdr->apply(); @@ -2455,7 +2454,7 @@ void Server::_logged_slave_link(MDRequest *mdr, CInode *targeti, utime_t old_cti mds->send_message_mds(reply, mdr->slave_to_mds); // set up commit waiter - mdr->more()->slave_commit = new C_MDS_SlaveLinkCommit(this, mdr, targeti, old_ctime, old_version, inc); + mdr->more()->slave_commit = new C_MDS_SlaveLinkCommit(this, mdr, targeti); // done. delete mdr->slave_request; @@ -2463,36 +2462,90 @@ void Server::_logged_slave_link(MDRequest *mdr, CInode *targeti, utime_t old_cti } -void Server::_commit_slave_link(MDRequest *mdr, int r, CInode *targeti, - utime_t old_ctime, version_t old_version, bool inc) +void Server::_commit_slave_link(MDRequest *mdr, int r, CInode *targeti) { dout(10) << "_commit_slave_link " << *mdr << " r=" << r - << " inc=" << inc << " " << *targeti << dendl; - ESlaveUpdate *le; if (r == 0) { // write a commit to the journal - le = new ESlaveUpdate(mdlog, "slave_link_commit", mdr->reqid, mdr->slave_to_mds, ESlaveUpdate::OP_COMMIT); + ESlaveUpdate *le = new ESlaveUpdate(mdlog, "slave_link_commit", mdr->reqid, mdr->slave_to_mds, + ESlaveUpdate::OP_COMMIT, ESlaveUpdate::LINK); + mdlog->submit_entry(le); + mds->mdcache->request_finish(mdr); } else { - le = new ESlaveUpdate(mdlog, "slave_link_rollback", mdr->reqid, mdr->slave_to_mds, ESlaveUpdate::OP_ROLLBACK); + do_link_rollback(mdr->more()->rollback_bl, mdr); + } +} - // -- rollback in memory -- - assert(targeti->inode.ctime == mdr->now); - assert(targeti->projected_inode.empty()); // we're holding the version lock. +struct C_MDS_LoggedLinkRollback : public Context { + Server *server; + Mutation *mut; + MDRequest *mdr; + C_MDS_LoggedLinkRollback(Server *s, Mutation *m, MDRequest *r) : server(s), mut(m), mdr(r) {} + void finish(int r) { + server->_do_link_rollback_finish(mut, mdr); + } +}; - targeti->inode.ctime = old_ctime; - targeti->inode.version = old_version; - if (inc) - targeti->inode.nlink++; - else - targeti->inode.nlink--; +void Server::do_link_rollback(bufferlist &rbl, MDRequest *mdr) +{ + link_rollback rollback; + bufferlist::iterator p = rbl.begin(); + ::decode(rollback, p); - // FIXME rctime etc.? - } + CInode *in = mds->mdcache->get_inode(rollback.ino); + assert(in); + dout(10) << "do_link_rollback of " << (rollback.was_inc ? "inc":"dec") << " on " << *in << dendl; + assert(!in->is_projected()); // live slave request hold versionlock. + + Mutation *mut = mdr; + if (!mut) { + assert(mds->is_resolve()); + mds->mdcache->add_rollback(rollback.reqid); // need to finish this update before resolve finishes + mut = new Mutation(rollback.reqid); + } + + inode_t *pi = in->project_inode(); + pi->version = in->pre_dirty(); + mut->add_projected_inode(in); + + // parent dir rctime + CDir *parent = in->get_parent_dn()->get_dir(); + fnode_t *pf = parent->project_fnode(); + mut->add_projected_fnode(parent); + pf->version = parent->pre_dirty(); + if (pf->fragstat.rctime == pi->ctime) { + pf->fragstat.rctime = rollback.old_dir_rctime; + mut->add_updated_scatterlock(&parent->get_inode()->dirlock); + } + + // inode + pi->ctime = rollback.old_ctime; + if (rollback.was_inc) + pi->nlink--; + else + pi->nlink++; - mdlog->submit_entry(le); + ESlaveUpdate *le = new ESlaveUpdate(mdlog, "slave_link_rollback", rollback.reqid, -1, + ESlaveUpdate::OP_ROLLBACK, ESlaveUpdate::LINK); + le->commit.add_dir_context(parent); + le->commit.add_dir(parent, true); + le->commit.add_primary_dentry(in->get_parent_dn(), true, 0, pi); + + mdlog->submit_entry(le, new C_MDS_LoggedLinkRollback(this, mut, mdr)); +} + +void Server::_do_link_rollback_finish(Mutation *mut, MDRequest *mdr) +{ + dout(10) << "_do_link_rollback_finish" << dendl; + mut->apply(); + if (mdr) + mds->mdcache->request_finish(mdr); + else { + mds->mdcache->finish_rollback(mut->reqid); + } } @@ -3329,8 +3382,7 @@ version_t Server::_rename_prepare_import(MDRequest *mdr, CDentry *srcdn, bufferl void Server::_rename_prepare(MDRequest *mdr, EMetaBlob *metablob, bufferlist *client_map_bl, - CDentry *srcdn, CDentry *destdn, CDentry *straydn, - EMetaBlob *rollback) + CDentry *srcdn, CDentry *destdn, CDentry *straydn) { dout(10) << "_rename_prepare " << *mdr << " " << *srcdn << " " << *destdn << dendl; if (straydn) dout(10) << " straydn " << *straydn << dendl; @@ -3418,16 +3470,15 @@ void Server::_rename_prepare(MDRequest *mdr, // sub off target if (destdn->is_auth() && !destdn->is_null()) mds->locker->predirty_nested(mdr, metablob, destdn->inode, destdn->dir, - (destdn->is_primary() ? PREDIRTY_PRIMARY:0)|predirty_dir, -1, - rollback); + (destdn->is_primary() ? PREDIRTY_PRIMARY:0)|predirty_dir, -1); // move srcdn int predirty_primary = (srcdn->is_primary() && srcdn->dir != destdn->dir) ? PREDIRTY_PRIMARY:0; int flags = predirty_dir | predirty_primary; if (srcdn->is_auth()) - mds->locker->predirty_nested(mdr, metablob, srcdn->inode, srcdn->dir, flags, -1, rollback); + mds->locker->predirty_nested(mdr, metablob, srcdn->inode, srcdn->dir, flags, -1); if (destdn->is_auth()) - mds->locker->predirty_nested(mdr, metablob, srcdn->inode, destdn->dir, flags, 1, rollback); + mds->locker->predirty_nested(mdr, metablob, srcdn->inode, destdn->dir, flags, 1); // add it all to the metablob // target inode @@ -3702,21 +3753,40 @@ void Server::handle_slave_rename_prep(MDRequest *mdr) srcdn->inode->is_any_caps()) { // journal. mdr->ls = mdlog->get_current_segment(); - ESlaveUpdate *le = new ESlaveUpdate(mdlog, "slave_rename_prep", mdr->reqid, mdr->slave_to_mds, ESlaveUpdate::OP_PREPARE); - - // commit case - bufferlist blah; - _rename_prepare(mdr, &le->commit, &blah, srcdn, destdn, straydn, &le->rollback); + ESlaveUpdate *le = new ESlaveUpdate(mdlog, "slave_rename_prep", mdr->reqid, mdr->slave_to_mds, + ESlaveUpdate::OP_PREPARE, ESlaveUpdate::RENAME); - // rollback case - if (destdn->inode && destdn->inode->is_auth()) { - assert(destdn->is_remote()); - le->rollback.add_dentry(destdn, true); + // encode everything we'd need to roll this back... basically, just the original state. + rename_rollback rollback; + + rollback.orig_src.dirfrag = srcdn->dir->dirfrag(); + rollback.orig_src.dname = srcdn->name; + if (srcdn->is_primary()) + rollback.orig_src.ino = srcdn->inode->ino(); + else { + rollback.orig_src.ino = 0; + rollback.orig_src.remote_ino = srcdn->get_remote_ino(); + rollback.orig_src.remote_ino = srcdn->get_remote_d_type(); } - if (srcdn->is_auth() || - (srcdn->inode && srcdn->inode->is_auth())) { - le->rollback.add_dentry(srcdn, true); + + rollback.orig_dest.dirfrag = destdn->dir->dirfrag(); + rollback.orig_dest.dname = destdn->name; + if (destdn->is_primary()) + rollback.orig_dest.ino = destdn->inode->ino(); + else { + rollback.orig_dest.ino = 0; + rollback.orig_dest.remote_ino = destdn->get_remote_ino(); + rollback.orig_dest.remote_ino = destdn->get_remote_d_type(); + } + + if (straydn) { + rollback.stray_dirfrag = straydn->dir->dirfrag(); + rollback.stray_dname = straydn->name; } + ::encode(rollback, le->rollback); + + bufferlist blah; // inode import data... obviously not used if we're the slave + _rename_prepare(mdr, &le->commit, &blah, srcdn, destdn, straydn); mdlog->submit_entry(le, new C_MDS_SlaveRenamePrep(this, mdr, srcdn, destdn, straydn)); } else { @@ -3780,36 +3850,57 @@ void Server::_commit_slave_rename(MDRequest *mdr, int r, { dout(10) << "_commit_slave_rename " << *mdr << " r=" << r << dendl; - // unfreeze+singleauth inode - // hmm, do i really need to delay this? - if (srcdn->is_auth() && destdn->is_primary() && - destdn->inode->state_test(CInode::STATE_AMBIGUOUSAUTH)) { - dout(10) << " unfreezing exported inode " << *destdn->inode << dendl; - list finished; - - // singleauth - assert(destdn->inode->state_test(CInode::STATE_AMBIGUOUSAUTH)); - destdn->inode->state_clear(CInode::STATE_AMBIGUOUSAUTH); - destdn->inode->take_waiting(CInode::WAIT_SINGLEAUTH, finished); - - // unfreeze - assert(destdn->inode->is_frozen_inode() || - destdn->inode->is_freezing_inode()); - destdn->inode->unfreeze_inode(finished); - - mds->queue_waiters(finished); - } - - ESlaveUpdate *le; if (r == 0) { // write a commit to the journal - le = new ESlaveUpdate(mdlog, "slave_rename_commit", mdr->reqid, mdr->slave_to_mds, ESlaveUpdate::OP_COMMIT); + le = new ESlaveUpdate(mdlog, "slave_rename_commit", mdr->reqid, mdr->slave_to_mds, ESlaveUpdate::OP_COMMIT, ESlaveUpdate::RENAME); + + // unfreeze+singleauth inode + // hmm, do i really need to delay this? + if (srcdn->is_auth() && destdn->is_primary() && + destdn->inode->state_test(CInode::STATE_AMBIGUOUSAUTH)) { + dout(10) << " unfreezing exported inode " << *destdn->inode << dendl; + list finished; + + // singleauth + assert(destdn->inode->state_test(CInode::STATE_AMBIGUOUSAUTH)); + destdn->inode->state_clear(CInode::STATE_AMBIGUOUSAUTH); + destdn->inode->take_waiting(CInode::WAIT_SINGLEAUTH, finished); + + // unfreeze + assert(destdn->inode->is_frozen_inode() || + destdn->inode->is_freezing_inode()); + destdn->inode->unfreeze_inode(finished); + + mds->queue_waiters(finished); + } + + mdlog->submit_entry(le); } else { // abort - le = new ESlaveUpdate(mdlog, "slave_rename_abort", mdr->reqid, mdr->slave_to_mds, ESlaveUpdate::OP_ROLLBACK); + do_rename_rollback(mdr->more()->rollback_bl, mdr); + + // rollback export. readjust subtree map, if it was a dir. + assert(0); // write me + + + } + + mds->mdcache->request_finish(mdr); +} + +void Server::do_rename_rollback(bufferlist &rbl, MDRequest *mdr) +{ + rename_rollback rollback; + bufferlist::iterator p = rbl.begin(); + ::decode(rollback, p); + + ESlaveUpdate *le = new ESlaveUpdate(mdlog, "slave_rename_abort", rollback.reqid, -1, + ESlaveUpdate::OP_ROLLBACK, ESlaveUpdate::RENAME); + + /* // -- rollback in memory -- if (mdr->more()->was_link_merge) { @@ -3843,7 +3934,8 @@ void Server::_commit_slave_rename(MDRequest *mdr, int r, mdr->more()->destdn_was_remote_inode->inode.nlink++; } else if (straydn && straydn->inode) { CInode *in = straydn->inode; - straydn->dir->unlink_inode(straydn); + strayd mdlog->submit_entry(le); +n->dir->unlink_inode(straydn); destdn->dir->link_primary_inode(destdn, in); straydn->dir->remove_dentry(straydn); } @@ -3858,12 +3950,10 @@ void Server::_commit_slave_rename(MDRequest *mdr, int r, // *** WRITE ME *** assert(0); + */ + assert(0); - } - - - - mdlog->submit_entry(le); + //mdlog->submit_entry(le, new ...); } void Server::handle_slave_rename_prep_ack(MDRequest *mdr, MMDSSlaveRequest *ack) diff --git a/src/mds/Server.h b/src/mds/Server.h index 858c7bb7c12..cd9d2c484ab 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -27,6 +27,9 @@ class EUpdate; class PVList; class MMDSSlaveRequest; + + + class Server { MDS *mds; MDCache *mdcache; @@ -137,10 +140,11 @@ public: version_t); void handle_slave_link_prep(MDRequest *mdr); - void _logged_slave_link(MDRequest *mdr, CInode *targeti, utime_t old_ctime, bool inc); - void _commit_slave_link(MDRequest *mdr, int r, CInode *targeti, - utime_t old_ctime, version_t old_version, bool inc); + void _logged_slave_link(MDRequest *mdr, CInode *targeti); + void _commit_slave_link(MDRequest *mdr, int r, CInode *targeti); void handle_slave_link_prep_ack(MDRequest *mdr, MMDSSlaveRequest *m); + void do_link_rollback(bufferlist &rbl, MDRequest *mdr); + void _do_link_rollback_finish(Mutation *mut, MDRequest *mdr); // unlink void handle_client_unlink(MDRequest *mdr); @@ -166,8 +170,7 @@ public: version_t _rename_prepare_import(MDRequest *mdr, CDentry *srcdn, bufferlist *client_map_bl); void _rename_prepare(MDRequest *mdr, EMetaBlob *metablob, bufferlist *client_map_bl, - CDentry *srcdn, CDentry *destdn, CDentry *straydn, - EMetaBlob *rollback=0); + CDentry *srcdn, CDentry *destdn, CDentry *straydn); void _rename_apply(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn); // slaving @@ -175,6 +178,7 @@ public: void handle_slave_rename_prep_ack(MDRequest *mdr, MMDSSlaveRequest *m); void _logged_slave_rename(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn); void _commit_slave_rename(MDRequest *mdr, int r, CDentry *srcdn, CDentry *destdn, CDentry *straydn); + void do_rename_rollback(bufferlist &rbl, MDRequest *mdr); }; diff --git a/src/mds/events/ESlaveUpdate.h b/src/mds/events/ESlaveUpdate.h index d25b17a43a1..c854fa3e8b8 100644 --- a/src/mds/events/ESlaveUpdate.h +++ b/src/mds/events/ESlaveUpdate.h @@ -18,12 +18,94 @@ #include "../LogEvent.h" #include "EMetaBlob.h" +/* + * rollback records, for remote/slave updates, which may need to be manually + * rolled back during journal replay. (or while active if master fails, but in + * that case these records aren't needed.) + */ +struct link_rollback { + metareqid_t reqid; + inodeno_t ino; + bool was_inc; + utime_t old_ctime; + utime_t old_dir_rctime; + + void encode(bufferlist &bl) const { + ::encode(reqid, bl); + ::encode(ino, bl); + ::encode(was_inc, bl); + ::encode(old_ctime, bl); + ::encode(old_dir_rctime, bl); + } + void decode(bufferlist::iterator &bl) { + ::decode(reqid, bl); + ::decode(ino, bl); + ::decode(was_inc, bl); + ::decode(old_ctime, bl); + ::decode(old_dir_rctime, bl); + } +}; +WRITE_CLASS_ENCODER(link_rollback) + +struct rename_rollback { + struct drec { + dirfrag_t dirfrag; + dirfrag_t dirfrag_mtime; + inodeno_t ino, remote_ino; + string dname; + char remote_d_type; + utime_t ctime; + + void encode(bufferlist &bl) const { + ::encode(dirfrag, bl); + ::encode(dirfrag_mtime, bl); + ::encode(ino, bl); + ::encode(remote_ino, bl); + ::encode(dname, bl); + ::encode(remote_d_type, bl); + } + void decode(bufferlist::iterator &bl) { + ::decode(dirfrag, bl); + ::decode(dirfrag_mtime, bl); + ::decode(ino, bl); + ::decode(remote_ino, bl); + ::decode(dname, bl); + ::decode(remote_d_type, bl); + } + }; + WRITE_CLASS_ENCODER_MEMBER(drec) + + metareqid_t reqid; + drec orig_src, orig_dest; + dirfrag_t stray_dirfrag; + string stray_dname; + + void encode(bufferlist &bl) const { + ::encode(reqid, bl); + encode(orig_src, bl); + encode(orig_dest, bl); + ::encode(stray_dirfrag, bl); + ::encode(stray_dname, bl); + } + void decode(bufferlist::iterator &bl) { + ::decode(reqid, bl); + decode(orig_src, bl); + decode(orig_dest, bl); + ::decode(stray_dirfrag, bl); + ::decode(stray_dname, bl); + } +}; +WRITE_CLASS_ENCODER(rename_rollback) + + class ESlaveUpdate : public LogEvent { public: const static int OP_PREPARE = 1; const static int OP_COMMIT = 2; const static int OP_ROLLBACK = 3; + const static int LINK = 1; + const static int RENAME = 2; /* * we journal a rollback metablob that contains the unmodified metadata * too, because we may be updating previously dirty metadata, which @@ -31,27 +113,31 @@ public: * those updates could be lost.. so we re-journal the unmodified metadata, * and replay will apply _either_ commit or rollback. */ - EMetaBlob commit, rollback; + EMetaBlob commit; + bufferlist rollback; string type; metareqid_t reqid; __s32 master; - __u32 op; // prepare, commit, abort + __u8 op; // prepare, commit, abort + __u8 origop; // link | rename ESlaveUpdate() : LogEvent(EVENT_SLAVEUPDATE) { } - ESlaveUpdate(MDLog *mdlog, const char *s, metareqid_t ri, int mastermds, int o) : - LogEvent(EVENT_SLAVEUPDATE), commit(mdlog), rollback(mdlog), + ESlaveUpdate(MDLog *mdlog, const char *s, metareqid_t ri, int mastermds, int o, int oo) : + LogEvent(EVENT_SLAVEUPDATE), commit(mdlog), type(s), reqid(ri), master(mastermds), - op(o) { } + op(o), origop(oo) { } void print(ostream& out) { if (type.length()) out << type << " "; out << " " << op; + if (origop == LINK) out << " link"; + if (origop == RENAME) out << " rename"; out << " " << reqid; out << " for mds" << master; - out << commit << " " << rollback; + out << commit; } void encode(bufferlist &bl) const { @@ -59,6 +145,7 @@ public: ::encode(reqid, bl); ::encode(master, bl); ::encode(op, bl); + ::encode(origop, bl); ::encode(commit, bl); ::encode(rollback, bl); } @@ -67,6 +154,7 @@ public: ::decode(reqid, bl); ::decode(master, bl); ::decode(op, bl); + ::decode(origop, bl); ::decode(commit, bl); ::decode(rollback, bl); } diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 23cabaf7566..3c923c3bae2 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -629,40 +629,36 @@ void ESlaveUpdate::replay(MDS *mds) { switch (op) { case ESlaveUpdate::OP_PREPARE: - // FIXME: horribly inefficient copy; EMetaBlob needs a swap() or something dout(10) << "ESlaveUpdate.replay prepare " << reqid << " for mds" << master - << ": saving blobs for later commit" << dendl; + << ": applying commit, saving rollback info" << dendl; assert(mds->mdcache->uncommitted_slave_updates[master].count(reqid) == 0); - commit._segment = _segment; // may need this later - rollback._segment = _segment; // may need this later + commit.replay(mds, _segment); mds->mdcache->uncommitted_slave_updates[master][reqid] = - new MDSlaveUpdate(commit, rollback, _segment->slave_updates); + new MDSlaveUpdate(origop, rollback, _segment->slave_updates); break; case ESlaveUpdate::OP_COMMIT: if (mds->mdcache->uncommitted_slave_updates[master].count(reqid)) { - dout(10) << "ESlaveUpdate.replay commit " << reqid << " for mds" << master - << ": applying commit blob" << dendl; - mds->mdcache->uncommitted_slave_updates[master][reqid]->commit.replay(mds, _segment); + dout(10) << "ESlaveUpdate.replay commit " << reqid << " for mds" << master << dendl; delete mds->mdcache->uncommitted_slave_updates[master][reqid]; mds->mdcache->uncommitted_slave_updates[master].erase(reqid); } else { dout(10) << "ESlaveUpdate.replay commit " << reqid << " for mds" << master - << ": ignoring, no previously saved blobs" << dendl; + << ": ignoring, no previously saved prepare" << dendl; } break; case ESlaveUpdate::OP_ROLLBACK: if (mds->mdcache->uncommitted_slave_updates[master].count(reqid)) { dout(10) << "ESlaveUpdate.replay abort " << reqid << " for mds" << master - << ": applying rollback blob" << dendl; + << ": applying rollback commit blob" << dendl; assert(mds->mdcache->uncommitted_slave_updates[master].count(reqid)); - mds->mdcache->uncommitted_slave_updates[master][reqid]->rollback.replay(mds, _segment); + commit.replay(mds, _segment); delete mds->mdcache->uncommitted_slave_updates[master][reqid]; mds->mdcache->uncommitted_slave_updates[master].erase(reqid); } else { dout(10) << "ESlaveUpdate.replay abort " << reqid << " for mds" << master - << ": ignoring, no previously saved blobs" << dendl; + << ": ignoring, no previously saved prepare" << dendl; } break;