From d099ac0fcecca72eca980c77200f5bc35c987e06 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 26 May 2008 11:56:31 -0700 Subject: [PATCH] mds: remote link/unlink behaving well enough. various bugfixes. --- src/client/SyntheticClient.cc | 22 +++++++++++++++++++ src/mds/CDir.cc | 1 + src/mds/CDir.h | 6 +++-- src/mds/CInode.cc | 3 ++- src/mds/Locker.cc | 16 ++++++++++---- src/mds/Locker.h | 5 ++++- src/mds/MDCache.cc | 15 +++++++++++-- src/mds/Server.cc | 41 ++++++++++++++++++----------------- src/vstartnew.sh | 4 +++- 9 files changed, 82 insertions(+), 31 deletions(-) diff --git a/src/client/SyntheticClient.cc b/src/client/SyntheticClient.cc index 51cccb4f4d10f..1761b306ea0e4 100644 --- a/src/client/SyntheticClient.cc +++ b/src/client/SyntheticClient.cc @@ -2641,6 +2641,28 @@ void SyntheticClient::make_dir_mess(const char *basedir, int n) void SyntheticClient::foo() { + if (1) { + // make 2 parallel dirs, link/unlink between them. + char a[100], b[100]; + client->mkdir("/a", 0755); + client->mkdir("/b", 0755); + for (int i=0; i<10; i++) { + sprintf(a, "/a/%d", i); + client->mknod(a, 0644); + } + while (1) { + for (int i=0; i<10; i++) { + sprintf(a, "/a/%d", i); + sprintf(b, "/b/%d", i); + client->link(a, b); + } + for (int i=0; i<10; i++) { + sprintf(b, "/b/%d", i); + client->unlink(b); + } + } + return; + } if (1) { // bug1.cpp const char *fn = "blah"; diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index cb9842f6708d0..631b4848c7456 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -1367,6 +1367,7 @@ void CDir::finish_export(utime_t now) void CDir::decode_import(bufferlist::iterator& blp) { ::decode(fnode, blp); + projected_version = fnode.version; ::decode(committed_version, blp); ::decode(committed_version_equivalent, blp); committing_version = committed_version; diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 3eac0af84331c..386cf50ae4f0a 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -183,8 +183,10 @@ public: version_t pre_dirty(version_t min=0); void _mark_dirty(LogSegment *ls); void _set_dirty_flag() { - state_set(STATE_DIRTY); - get(PIN_DIRTY); + if (!state_test(STATE_DIRTY)) { + state_set(STATE_DIRTY); + get(PIN_DIRTY); + } } void mark_dirty(version_t pv, LogSegment *ls); void mark_clean(); diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 8122f0634ae18..453b8059cb8c5 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -622,7 +622,8 @@ void CInode::decode_lock_state(int type, bufferlist& bl) << *dir << dendl; fnode_t *pf = dir->get_projected_fnode(); pf->accounted_fragstat = fragstat; - dir->_set_dirty_flag(); // bit of a hack + if (dir->is_auth()) + dir->_set_dirty_flag(); // bit of a hack } } } diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index dbb035c93bebb..ed2cebdf5c0cd 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -1215,9 +1215,12 @@ void Locker::revoke_client_leases(SimpleLock *lock) void Locker::predirty_nested(Mutation *mut, EMetaBlob *blob, CInode *in, CDir *parent, - bool primary_dn, bool do_parent, int linkunlink, + int flags, int linkunlink, EMetaBlob *rollback) { + bool primary_dn = flags & PREDIRTY_PRIMARY; + bool do_parent = flags & PREDIRTY_DIR; + dout(10) << "predirty_nested" << (do_parent ? " do_parent_mtime":"") << " linkunlink=" << linkunlink @@ -1238,7 +1241,7 @@ void Locker::predirty_nested(Mutation *mut, EMetaBlob *blob, list lsi; CInode *cur = in; while (parent) { - assert(cur->is_auth()); + assert(cur->is_auth() || !primary_dn); assert(parent->is_auth()); // opportunistically adjust parent dirfrag @@ -1461,6 +1464,9 @@ void Locker::handle_simple_lock(SimpleLock *lock, MLock *m) { int from = m->get_asker(); + dout(10) << "handle_simple_lock " << *m + << " on " << *lock << " " << *lock->get_parent() << dendl; + if (mds->is_rejoin()) { if (lock->get_parent()->is_rejoining()) { dout(7) << "handle_simple_lock still rejoining " << *lock->get_parent() @@ -1497,9 +1503,11 @@ void Locker::handle_simple_lock(SimpleLock *lock, MLock *m) //|| lock->get_state() == LOCK_GLOCKR); // wait for readers to finish? - if (lock->is_rdlocked()) { - dout(7) << "handle_simple_lock has reader, waiting before ack on " << *lock + if (lock->is_rdlocked() || + lock->get_num_client_lease()) { + dout(7) << "handle_simple_lock has reader|leases, waiting before ack on " << *lock << " on " << *lock->get_parent() << dendl; + revoke_client_leases(lock); lock->set_state(LOCK_GLOCKR); } else { // update lock and reply diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 42772b979e41e..c6a9f2bb0d3d7 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -54,6 +54,9 @@ class ScatterLock; class LocalLock; class MDCache; +static const int PREDIRTY_PRIMARY = 1; // primary dn, adjust nested accounting +static const int PREDIRTY_DIR = 2; // update parent dir mtime/size + class Locker { private: MDS *mds; @@ -159,7 +162,7 @@ protected: public: void predirty_nested(Mutation *mut, EMetaBlob *blob, CInode *in, CDir *dir, - bool do_nested, bool do_parent, int linkunlink=0, + int flags, int linkunlink=0, EMetaBlob *rollback=0); // local diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 1cd08e9d6aeed..380453c647ac7 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -4500,8 +4500,14 @@ void MDCache::open_remote_ino_2(inodeno_t ino, return; } - if (!dir && in->is_auth()) + if (!dir && in->is_auth()) { + if (dir->is_frozen_dir()) { + dout(7) << "traverse: " << *dir << " is frozen_dir, waiting" << dendl; + dir->add_waiter(CDir::WAIT_UNFREEZE, _get_waiter(mdr, 0)); + return; + } dir = in->get_or_open_dirfrag(this, frag); + } assert(dir); if (dir->is_auth()) { @@ -4551,7 +4557,6 @@ MDRequest *MDCache::request_start(MClientRequest *req) if (mdr->is_slave()) { dout(10) << "request_start already had " << *mdr << ", cleaning up" << dendl; request_cleanup(mdr); - delete mdr; } else { dout(10) << "request_start already processing " << *mdr << ", dropping new msg" << dendl; delete req; @@ -5215,6 +5220,12 @@ void MDCache::discover_ino(CDir *base, << (want_xlocked ? " want_xlocked":"") << dendl; + if (base->is_ambiguous_auth()) { + dout(10) << " waiting for single auth on " << *base << dendl; + base->add_waiter(CDir::WAIT_SINGLEAUTH, onfinish); + return; + } + if (!base->is_waiting_for_ino(want_ino)) { MDiscover *dis = new MDiscover(mds->get_nodeid(), base->dirfrag(), diff --git a/src/mds/Server.cc b/src/mds/Server.cc index b6c0710127401..854756ecec299 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -2048,7 +2048,7 @@ void Server::handle_client_mknod(MDRequest *mdr) le->metablob.add_client_req(req->get_reqid()); le->metablob.add_allocated_ino(newi->ino(), mds->idalloc->get_version()); - mds->locker->predirty_nested(mdr, &le->metablob, newi, dn->dir, true, true, 1); + mds->locker->predirty_nested(mdr, &le->metablob, newi, dn->dir, PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); le->metablob.add_primary_dentry(dn, true, newi, &newi->inode); @@ -2093,7 +2093,7 @@ void Server::handle_client_mkdir(MDRequest *mdr) EUpdate *le = new EUpdate(mdlog, "mkdir"); le->metablob.add_client_req(req->get_reqid()); le->metablob.add_allocated_ino(newi->ino(), mds->idalloc->get_version()); - mds->locker->predirty_nested(mdr, &le->metablob, newi, dn->dir, true, true, 1); + mds->locker->predirty_nested(mdr, &le->metablob, newi, dn->dir, PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); le->metablob.add_primary_dentry(dn, true, newi, &newi->inode); le->metablob.add_dir(newdir, true, true); // dirty AND complete @@ -2145,7 +2145,7 @@ void Server::handle_client_symlink(MDRequest *mdr) EUpdate *le = new EUpdate(mdlog, "symlink"); le->metablob.add_client_req(req->get_reqid()); le->metablob.add_allocated_ino(newi->ino(), mds->idalloc->get_version()); - mds->locker->predirty_nested(mdr, &le->metablob, newi, dn->dir, true, true, 1); + mds->locker->predirty_nested(mdr, &le->metablob, newi, dn->dir, PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); le->metablob.add_primary_dentry(dn, true, newi, &newi->inode); // log + wait @@ -2291,8 +2291,8 @@ void Server::_link_local(MDRequest *mdr, CDentry *dn, CInode *targeti) // log + wait EUpdate *le = new EUpdate(mdlog, "link_local"); le->metablob.add_client_req(mdr->reqid); - mds->locker->predirty_nested(mdr, &le->metablob, targeti, dn->dir, false, true, 1); // new dn - mds->locker->predirty_nested(mdr, &le->metablob, targeti, 0, true, true); // targeti + mds->locker->predirty_nested(mdr, &le->metablob, targeti, dn->dir, PREDIRTY_DIR, 1); // new dn + mds->locker->predirty_nested(mdr, &le->metablob, targeti, 0, PREDIRTY_PRIMARY); // targeti le->metablob.add_remote_dentry(dn, true, targeti->ino(), MODE_TO_DT(targeti->inode.mode)); // new remote le->metablob.add_primary_dentry(targeti->parent, true, targeti, pi); // update old primary @@ -2370,7 +2370,7 @@ void Server::_link_remote(MDRequest *mdr, CDentry *dn, CInode *targeti) mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "link_remote"); le->metablob.add_client_req(mdr->reqid); - mds->locker->predirty_nested(mdr, &le->metablob, targeti, dn->dir, false, true, 1); + mds->locker->predirty_nested(mdr, &le->metablob, targeti, dn->dir, PREDIRTY_DIR, 1); le->metablob.add_remote_dentry(dn, true, targeti->ino(), MODE_TO_DT(targeti->inode.mode)); // new remote @@ -2433,6 +2433,8 @@ void Server::handle_slave_link_prep(MDRequest *mdr) mdr->now = mdr->slave_request->now; + mdr->auth_pin(targeti); + // anchor? if (mdr->slave_request->get_op() == MMDSSlaveRequest::OP_LINKPREP) { if (targeti->is_anchored() && !targeti->is_unanchoring()) { @@ -2469,7 +2471,7 @@ void Server::handle_slave_link_prep(MDRequest *mdr) dout(10) << " projected inode " << pi << " v " << pi->version << dendl; // commit case - mds->locker->predirty_nested(mdr, &le->commit, dn->inode, 0, true, false, 0, &le->rollback); + mds->locker->predirty_nested(mdr, &le->commit, dn->inode, 0, PREDIRTY_PRIMARY, 0, &le->rollback); le->commit.add_primary_dentry(dn, true, targeti, pi); // update old primary le->rollback.add_primary_dentry(dn, true, targeti, oldi); @@ -2502,6 +2504,7 @@ void Server::_logged_slave_link(MDRequest *mdr, CInode *targeti, utime_t old_cti // update the target targeti->pop_and_dirty_projected_inode(mdr->ls); + mdr->apply(); // hit pop mds->balancer->hit_inode(mdr->now, targeti, META_POP_IWR); @@ -2738,18 +2741,17 @@ void Server::_unlink_local(MDRequest *mdr, CDentry *dn, CDentry *straydn) pi->nlink--; pi->ctime = mdr->now; - mds->locker->predirty_nested(mdr, &le->metablob, dn->inode, dn->dir, - dn->is_primary(), true, -1); - if (dn->is_primary()) { // primary link. add stray dentry. assert(straydn); - mds->locker->predirty_nested(mdr, &le->metablob, dn->inode, straydn->dir, true, true, 1); + mds->locker->predirty_nested(mdr, &le->metablob, dn->inode, dn->dir, PREDIRTY_PRIMARY|PREDIRTY_DIR, -1); + mds->locker->predirty_nested(mdr, &le->metablob, dn->inode, straydn->dir, PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); //le->metablob.add_dir_context(straydn->dir); le->metablob.add_primary_dentry(straydn, true, dn->inode, pi); } else { // remote link. update remote inode. - mds->locker->predirty_nested(mdr, &le->metablob, dn->inode, 0, true, true); + mds->locker->predirty_nested(mdr, &le->metablob, dn->inode, dn->dir, PREDIRTY_DIR, -1); + mds->locker->predirty_nested(mdr, &le->metablob, dn->inode, 0, PREDIRTY_PRIMARY); le->metablob.add_primary_dentry(dn->inode->parent, true, dn->inode); } @@ -2859,7 +2861,7 @@ void Server::_unlink_remote(MDRequest *mdr, CDentry *dn) le->metablob.add_client_req(mdr->reqid); // the unlinked dentry - mds->locker->predirty_nested(mdr, &le->metablob, dn->inode, dn->dir, false, true, -1); + mds->locker->predirty_nested(mdr, &le->metablob, dn->inode, dn->dir, PREDIRTY_DIR, -1); dn->pre_dirty(); le->metablob.add_null_dentry(dn, true); @@ -3456,19 +3458,18 @@ void Server::_rename_prepare(MDRequest *mdr, if (mdr->is_master()) { // sub off target if (!linkmerge && destdn->is_primary()) - mds->locker->predirty_nested(mdr, metablob, destdn->inode, destdn->dir, - true, true, -1); + mds->locker->predirty_nested(mdr, metablob, destdn->inode, destdn->dir, PREDIRTY_PRIMARY|PREDIRTY_DIR, -1); if (destdn->dir == srcdn->dir) { // same dir. don't update nested info or adjust counts. mds->locker->predirty_nested(mdr, metablob, srcdn->inode, srcdn->dir, false, true); } else { // different dir. update nested accounting. + int flags = srcdn->is_primary() ? PREDIRTY_PRIMARY:0; + flags |= PREDIRTY_DIR; if (srcdn->is_auth()) - mds->locker->predirty_nested(mdr, metablob, srcdn->inode, srcdn->dir, - srcdn->is_primary(), true, -1); - mds->locker->predirty_nested(mdr, metablob, srcdn->inode, destdn->dir, - srcdn->is_primary(), true, 1); + mds->locker->predirty_nested(mdr, metablob, srcdn->inode, srcdn->dir, flags, -1); + mds->locker->predirty_nested(mdr, metablob, srcdn->inode, destdn->dir, flags, 1); } } @@ -4345,7 +4346,7 @@ void Server::handle_client_openc(MDRequest *mdr) EUpdate *le = new EUpdate(mdlog, "openc"); le->metablob.add_client_req(req->get_reqid()); le->metablob.add_allocated_ino(in->ino(), mds->idalloc->get_version()); - mds->locker->predirty_nested(mdr, &le->metablob, in, dn->dir, true, true, 1); + mds->locker->predirty_nested(mdr, &le->metablob, in, dn->dir, PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); le->metablob.add_primary_dentry(dn, true, in, &in->inode); // log + wait diff --git a/src/vstartnew.sh b/src/vstartnew.sh index 793f57eedecfc..208625094d2ad 100755 --- a/src/vstartnew.sh +++ b/src/vstartnew.sh @@ -48,7 +48,9 @@ do done # mds -$CEPH_BIN/cmds $ARGS --debug_ms 1 --debug_mds 20 --mds_thrash_fragments 0 #--debug_ms 20 +$CEPH_BIN/cmds $ARGS --debug_ms 1 --debug_mds 20 --mds_thrash_fragments 0 --mds_thrash_exports 1 #--debug_ms 20 +$CEPH_BIN/cmds $ARGS --debug_ms 1 --debug_mds 20 --mds_thrash_fragments 0 --mds_thrash_exports 1 #--debug_ms 20 +./cmonctl mds set_max_mds 2 echo "started. stop.sh to stop. see out/* (e.g. 'tail -f out/????') for debug output." -- 2.39.5