From b9c6b443b7d7307fc89314b6ad75562bc22b845e Mon Sep 17 00:00:00 2001 From: sageweil Date: Thu, 21 Jun 2007 22:07:16 +0000 Subject: [PATCH] * rename thrash test ran to completion. git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1436 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/sage/cephmds2/TODO | 11 +++ branches/sage/cephmds2/mds/CDir.cc | 8 +- branches/sage/cephmds2/mds/CDir.h | 4 +- branches/sage/cephmds2/mds/Locker.cc | 5 ++ branches/sage/cephmds2/mds/MDCache.cc | 34 ++++--- branches/sage/cephmds2/mds/MDCache.h | 3 +- branches/sage/cephmds2/mds/Server.cc | 89 ++++++++++++++++--- branches/sage/cephmds2/mds/Server.h | 3 + .../sage/cephmds2/messages/MMDSSlaveRequest.h | 2 +- 9 files changed, 126 insertions(+), 33 deletions(-) diff --git a/branches/sage/cephmds2/TODO b/branches/sage/cephmds2/TODO index 0fb779285443a..5fcc36b834551 100644 --- a/branches/sage/cephmds2/TODO +++ b/branches/sage/cephmds2/TODO @@ -50,6 +50,17 @@ sage mds - slave request cleanup on failure - flag request, and discard on re-dispatch? (cuz it'll be waiting on random stuff) +- resolve DISCOVERXLOCK versus rename issue + - witness list may change.. +- revisit wrlocks, dir inode mtime updates. esp in rename. + +- reimplement _local_link/unlink using rename as model. + + +- Q: locker vs migration + - maybe unstable lock states should auth_pin. would simplify migration logic, and probably avoid a number of bugs. basically, the freeze would have to wait for any in-progress lock gathers (not that long!). + - ...but does it play nice with wrlock? and unfortuantely auth_pinning wrlocks would kill performance on updates at dir delegation points. + - dirlock-protected mtime updates vs migration, journaling, recovery - rename fun - remote pinning diff --git a/branches/sage/cephmds2/mds/CDir.cc b/branches/sage/cephmds2/mds/CDir.cc index bdab236368227..30ff293773a70 100644 --- a/branches/sage/cephmds2/mds/CDir.cc +++ b/branches/sage/cephmds2/mds/CDir.cc @@ -148,14 +148,14 @@ CDir::CDir(CInode *in, frag_t fg, MDCache *mdcache, bool auth) * linking fun */ -CDentry* CDir::add_dentry( const string& dname, inodeno_t ino, bool auth) +CDentry* CDir::add_dentry( const string& dname, inodeno_t ino) { // foreign assert(lookup(dname) == 0); // create dentry CDentry* dn = new CDentry(dname, ino); - if (auth) + if (is_auth()) dn->state_set(CDentry::STATE_AUTH); cache->lru.lru_insert_mid(dn); @@ -180,14 +180,14 @@ CDentry* CDir::add_dentry( const string& dname, inodeno_t ino, bool auth) } -CDentry* CDir::add_dentry( const string& dname, CInode *in, bool auth ) +CDentry* CDir::add_dentry( const string& dname, CInode *in) { // primary assert(lookup(dname) == 0); // create dentry CDentry* dn = new CDentry(dname, in); - if (auth) + if (is_auth()) dn->state_set(CDentry::STATE_AUTH); cache->lru.lru_insert_mid(dn); diff --git a/branches/sage/cephmds2/mds/CDir.h b/branches/sage/cephmds2/mds/CDir.h index 3d89809f68492..4babf35f5b4e7 100644 --- a/branches/sage/cephmds2/mds/CDir.h +++ b/branches/sage/cephmds2/mds/CDir.h @@ -229,8 +229,8 @@ class CDir : public MDSCacheObject { return iter->second; } - CDentry* add_dentry( const string& dname, CInode *in=0, bool auth=true ); - CDentry* add_dentry( const string& dname, inodeno_t ino, bool auth=true ); + CDentry* add_dentry( const string& dname, CInode *in=0 ); + CDentry* add_dentry( const string& dname, inodeno_t ino ); void remove_dentry( CDentry *dn ); // delete dentry void link_inode( CDentry *dn, inodeno_t ino ); void link_inode( CDentry *dn, CInode *in ); diff --git a/branches/sage/cephmds2/mds/Locker.cc b/branches/sage/cephmds2/mds/Locker.cc index d4f2f7f208b73..dbfa28512f0ac 100644 --- a/branches/sage/cephmds2/mds/Locker.cc +++ b/branches/sage/cephmds2/mds/Locker.cc @@ -136,9 +136,12 @@ bool Locker::acquire_locks(MDRequest *mdr, // make list of items to authpin set mustpin = xlocks; + + /* don't auth_pin wrlocks.. they're a moving target! (might import while an update is in progress) for (set::iterator p = wrlocks.begin(); p != wrlocks.end(); ++p) if ((*p)->get_parent()->is_auth()) mustpin.insert(*p); + */ map > mustpin_remote; // mds -> (object set) @@ -158,6 +161,7 @@ bool Locker::acquire_locks(MDRequest *mdr, if (!object->is_auth()) { if (object->is_ambiguous_auth()) { // wait + dout(10) << " ambiguous auth, waiting to authpin " << *object << endl; object->add_waiter(MDSCacheObject::WAIT_SINGLEAUTH, new C_MDS_RetryRequest(mdcache, mdr)); mdcache->request_drop_locks(mdr); mdr->drop_local_auth_pins(); @@ -168,6 +172,7 @@ bool Locker::acquire_locks(MDRequest *mdr, } if (!object->can_auth_pin()) { // wait + dout(10) << " can't auth_pin (freezing?), waiting to authpin " << *object << endl; object->add_waiter(MDSCacheObject::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mdcache, mdr)); mdcache->request_drop_locks(mdr); mdr->drop_local_auth_pins(); diff --git a/branches/sage/cephmds2/mds/MDCache.cc b/branches/sage/cephmds2/mds/MDCache.cc index c0b36a2bcb185..f146ddd37f8b9 100644 --- a/branches/sage/cephmds2/mds/MDCache.cc +++ b/branches/sage/cephmds2/mds/MDCache.cc @@ -3473,11 +3473,13 @@ void MDCache::request_finish(MDRequest *mdr) void MDCache::request_forward(MDRequest *mdr, int who, int port) { if (!port) port = MDS_PORT_SERVER; - dout(7) << "request_forward " << *mdr << " to mds" << who << " req " << *mdr << endl; - - mds->forward_message_mds(mdr->client_request, who, port); + + // first clean up (notably, finish any slave requests), request_cleanup(mdr); + + // _then_ forward. + mds->forward_message_mds(mdr->client_request, who, port); if (mds->logger) mds->logger->inc("fw"); } @@ -3495,14 +3497,20 @@ void MDCache::dispatch_request(MDRequest *mdr) -void MDCache::request_forget_foreign_locks(set& s) +void MDCache::request_forget_foreign_locks(MDRequest *mdr) { - set::iterator p = s.begin(); - while (p != s.end()) { + // xlocks + set::iterator p = mdr->xlocks.begin(); + while (p != mdr->xlocks.end()) { if ((*p)->get_parent()->is_auth()) p++; - else - s.erase(p++); + else { + dout(10) << "request_forget_foreign_locks " << **p + << " on " << *(*p)->get_parent() << endl; + (*p)->put_xlock(); + mdr->locks.erase(*p); + mdr->xlocks.erase(p++); + } } } @@ -3539,10 +3547,8 @@ void MDCache::request_cleanup(MDRequest *mdr) MMDSSlaveRequest *r = new MMDSSlaveRequest(mdr->reqid, MMDSSlaveRequest::OP_FINISH); mds->send_message_mds(r, *p, MDS_PORT_SERVER); } - // strip foreign locks out of lock lists, since the above drops them implicitly. - request_forget_foreign_locks(mdr->xlocks); - request_forget_foreign_locks(mdr->wrlocks); - request_forget_foreign_locks(mdr->rdlocks); + // strip foreign xlocks out of lock lists, since the OP_FINISH drops them implicitly. + request_forget_foreign_locks(mdr); // drop locks @@ -4314,7 +4320,7 @@ void MDCache::handle_discover_reply(MDiscoverReply *m) dout(7) << "had " << *dn << endl; m->get_dentry(i).update_dentry(dn); } else { - dn = curdir->add_dentry( m->get_dentry(i).get_dname(), 0, false ); + dn = curdir->add_dentry( m->get_dentry(i).get_dname(), 0 ); m->get_dentry(i).update_new_dentry(dn); dout(7) << "added " << *dn << endl; } @@ -4615,7 +4621,7 @@ void MDCache::handle_dentry_unlink(MDentryUnlink *m) if (!finished.empty()) mds->queue_waiters(finished); // dentry - straydn = dir->add_dentry( m->straydn->get_dname(), 0, false ); + straydn = dir->add_dentry( m->straydn->get_dname(), 0 ); m->straydn->update_new_dentry(straydn); } diff --git a/branches/sage/cephmds2/mds/MDCache.h b/branches/sage/cephmds2/mds/MDCache.h index c481d5df4e789..4bd0df8054a54 100644 --- a/branches/sage/cephmds2/mds/MDCache.h +++ b/branches/sage/cephmds2/mds/MDCache.h @@ -112,6 +112,7 @@ struct MDRequest { map pvmap; bufferlist inode_import; version_t inode_import_v; + CDentry *srcdn; // srcdn, if auth, on slave Context *slave_commit; @@ -275,7 +276,7 @@ public: void request_finish(MDRequest *mdr); void request_forward(MDRequest *mdr, int mds, int port=0); void dispatch_request(MDRequest *mdr); - void request_forget_foreign_locks(set& s); + void request_forget_foreign_locks(MDRequest *mdr); void request_drop_locks(MDRequest *mdr); void request_cleanup(MDRequest *r); diff --git a/branches/sage/cephmds2/mds/Server.cc b/branches/sage/cephmds2/mds/Server.cc index aa9f4444967dd..35cc97f42609b 100644 --- a/branches/sage/cephmds2/mds/Server.cc +++ b/branches/sage/cephmds2/mds/Server.cc @@ -516,7 +516,6 @@ void Server::handle_slave_request(MMDSSlaveRequest *m) // reply? if (m->is_reply()) { - // yay! switch (m->get_op()) { case MMDSSlaveRequest::OP_XLOCKACK: @@ -548,6 +547,13 @@ void Server::handle_slave_request(MMDSSlaveRequest *m) } break; + case MMDSSlaveRequest::OP_RENAMEGETINODEACK: + { + MDRequest *mdr = mdcache->request_get(m->get_reqid()); + handle_slave_rename_get_inode_ack(mdr, m); + } + break; + default: assert(0); } @@ -638,6 +644,10 @@ void Server::dispatch_slave_request(MDRequest *mdr) handle_slave_rename_prep(mdr); break; + case MMDSSlaveRequest::OP_RENAMEGETINODE: + handle_slave_rename_get_inode(mdr); + break; + case MMDSSlaveRequest::OP_FINISH: // slave finisher? if (mdr->slave_commit) { @@ -2468,7 +2478,10 @@ void Server::handle_client_rename(MDRequest *mdr) // -- prepare witnesses -- set witnesses = mdr->extra_witnesses; - srcdn->list_replicas(witnesses); + if (srcdn->is_auth()) + srcdn->list_replicas(witnesses); + else + witnesses.insert(srcdn->authority().first); destdn->list_replicas(witnesses); for (set::iterator p = witnesses.begin(); @@ -2488,13 +2501,28 @@ void Server::handle_client_rename(MDRequest *mdr) } } + // -- inode migration? -- + if (!srcdn->is_auth() && + srcdn->is_primary()) { + if (mdr->inode_import.length() == 0) { + // get inode + int auth = srcdn->authority().first; + dout(10) << " requesting inode export from srcdn auth mds" << auth << endl; + MMDSSlaveRequest *req = new MMDSSlaveRequest(mdr->reqid, MMDSSlaveRequest::OP_RENAMEGETINODE); + srcdn->make_path(req->srcdnpath); + mds->send_message_mds(req, auth, MDS_PORT_SERVER); + mdr->waiting_on_remote_witness = auth; // FIXME hrm. + return; + } + dout(10) << " already (just!) got inode export from srcdn auth" << endl; + } + // -- prepare journal entry -- EUpdate *le = new EUpdate("rename"); le->metablob.add_client_req(mdr->reqid); CDentry *straydn = _rename_prepare(mdr, &le->metablob, srcdn, destdn); - // -- prepare anchor updates -- C_MDS_rename_anchor *anchorfin = 0; C_Gather *anchorgather = 0; @@ -2532,6 +2560,9 @@ void Server::handle_client_rename(MDRequest *mdr) // -- commit locally -- C_MDS_rename_finish *fin = new C_MDS_rename_finish(mds, mdr, srcdn, destdn, straydn); + // and apply! + _rename_apply(mdr, srcdn, destdn, straydn); + journal_opens(); // journal pending opens, just in case if (anchorfin) { @@ -2565,7 +2596,7 @@ void Server::_rename_finish(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDe dout(10) << "_rename_finish " << *mdr << endl; // apply - _rename_apply(mdr, srcdn, destdn, straydn); + //_rename_apply(mdr, srcdn, destdn, straydn); // commit anchor updates? if (atid1) mds->anchorclient->commit(atid1); @@ -2775,8 +2806,9 @@ void Server::_rename_apply(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDen if (srcdn->is_auth()) srcdn->mark_dirty(mdr->pvmap[srcdn]); - // import srcdn inode? - if (mdr->inode_import.length()) { + // srcdn inode import? + if (!srcdn->is_auth() && destdn->is_auth()) { + assert(mdr->inode_import.length() > 0); int off = 0; mdcache->migrator->decode_import_inode(destdn, mdr->inode_import, off, srcdn->authority().first); @@ -2889,13 +2921,11 @@ void Server::_logged_slave_rename(MDRequest *mdr, // ack MMDSSlaveRequest *reply = new MMDSSlaveRequest(mdr->reqid, MMDSSlaveRequest::OP_RENAMEPREPACK); if (srcdn->is_auth()) { + // share the replica list, so that they can all witness the rename. srcdn->list_replicas(reply->srcdn_replicas); - if (srcdn->is_primary()) { - dout(10) << " including inode export info" << endl; - mdcache->migrator->encode_export_inode(srcdn->inode, reply->inode_export, mdr->slave_to_mds); - reply->inode_export_v = srcdn->inode->inode.version; - } + // note srcdn, we'll get asked for inode momentarily + mdr->srcdn = srcdn; } mds->send_message_mds(reply, mdr->slave_to_mds, MDS_PORT_SERVER); @@ -2954,6 +2984,43 @@ void Server::handle_slave_rename_prep_ack(MDRequest *mdr, MMDSSlaveRequest *m) +void Server::handle_slave_rename_get_inode(MDRequest *mdr) +{ + dout(10) << "handle_slave_rename_get_inode " << *mdr << endl; + + assert(mdr->srcdn); + assert(mdr->srcdn->is_auth()); + assert(mdr->srcdn->is_primary()); + + // reply + MMDSSlaveRequest *reply = new MMDSSlaveRequest(mdr->reqid, MMDSSlaveRequest::OP_RENAMEGETINODEACK); + dout(10) << " replying with inode export info" << endl; + mdcache->migrator->encode_export_inode(mdr->srcdn->inode, reply->inode_export, mdr->slave_to_mds); + reply->inode_export_v = mdr->srcdn->inode->inode.version; + + mdr->inode_import = reply->inode_export; // keep a copy locally, in case we have to rollback + + mds->send_message_mds(reply, mdr->slave_to_mds, MDS_PORT_SERVER); + + // clean up. + delete mdr->slave_request; + mdr->slave_request = 0; +} + +void Server::handle_slave_rename_get_inode_ack(MDRequest *mdr, MMDSSlaveRequest *m) +{ + dout(10) << "handle_slave_rename_get_inode_ack " << *mdr + << " " << *m << endl; + + assert(m->inode_export.length()); + dout(10) << " got inode export, saving in " << *mdr << endl; + mdr->inode_import.claim(m->inode_export); + mdr->inode_import_v = m->inode_export_v; + mdr->waiting_on_remote_witness = -1; // FIXME hrm. + + dispatch_client_request(mdr); // go again! +} + diff --git a/branches/sage/cephmds2/mds/Server.h b/branches/sage/cephmds2/mds/Server.h index 1938f2dfc5fe3..55ab1ffef736f 100644 --- a/branches/sage/cephmds2/mds/Server.h +++ b/branches/sage/cephmds2/mds/Server.h @@ -152,6 +152,9 @@ public: void handle_slave_rename_prep_ack(MDRequest *mdr, MMDSSlaveRequest *m); void _logged_slave_rename(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn); void _commit_slave_rename(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn); + void handle_slave_rename_get_inode(MDRequest *mdr); + void handle_slave_rename_get_inode_ack(MDRequest *mdr, MMDSSlaveRequest *m); + }; diff --git a/branches/sage/cephmds2/messages/MMDSSlaveRequest.h b/branches/sage/cephmds2/messages/MMDSSlaveRequest.h index f13c5387cd5e2..1f55661b7e079 100644 --- a/branches/sage/cephmds2/messages/MMDSSlaveRequest.h +++ b/branches/sage/cephmds2/messages/MMDSSlaveRequest.h @@ -47,7 +47,7 @@ class MMDSSlaveRequest : public Message { case OP_RENAMEPREP: return "rename_prep"; case OP_RENAMEPREPACK: return "rename_prep_ack"; case OP_RENAMEGETINODE: return "rename_get_inode"; - case OP_RENAMEGOTINODE: return "rename_got_inode"; + case OP_RENAMEGETINODEACK: return "rename_get_inode_ack"; case OP_FINISH: return "finish"; // commit default: assert(0); return 0; -- 2.39.5