From b719c69e6fe2d57fe6620109cb275c2c72229ea4 Mon Sep 17 00:00:00 2001 From: sageweil Date: Wed, 20 Jun 2007 00:54:31 +0000 Subject: [PATCH] * rename commits on slaves git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1431 29311d96-e01e-0410-9327-a35deaab8ce9 --- .../sage/cephmds2/client/SyntheticClient.cc | 2 +- branches/sage/cephmds2/mds/MDCache.cc | 35 ++++-- branches/sage/cephmds2/mds/MDCache.h | 17 ++- branches/sage/cephmds2/mds/Server.cc | 112 +++++++++--------- branches/sage/cephmds2/mds/Server.h | 3 +- .../sage/cephmds2/messages/MMDSSlaveRequest.h | 22 +--- 6 files changed, 96 insertions(+), 95 deletions(-) diff --git a/branches/sage/cephmds2/client/SyntheticClient.cc b/branches/sage/cephmds2/client/SyntheticClient.cc index 429142bcd538d..7cbcf6300eb02 100644 --- a/branches/sage/cephmds2/client/SyntheticClient.cc +++ b/branches/sage/cephmds2/client/SyntheticClient.cc @@ -1360,7 +1360,7 @@ void SyntheticClient::make_dir_mess(const char *basedir, int n) void SyntheticClient::foo() { - if (0) { + if (1) { // rename fun for (int i=0; i<100; i++) { int s = 5; diff --git a/branches/sage/cephmds2/mds/MDCache.cc b/branches/sage/cephmds2/mds/MDCache.cc index c0319c32d6878..c0b36a2bcb185 100644 --- a/branches/sage/cephmds2/mds/MDCache.cc +++ b/branches/sage/cephmds2/mds/MDCache.cc @@ -3495,6 +3495,16 @@ void MDCache::dispatch_request(MDRequest *mdr) +void MDCache::request_forget_foreign_locks(set& s) +{ + set::iterator p = s.begin(); + while (p != s.end()) { + if ((*p)->get_parent()->is_auth()) + p++; + else + s.erase(p++); + } +} void MDCache::request_drop_locks(MDRequest *mdr) { @@ -3505,7 +3515,7 @@ void MDCache::request_drop_locks(MDRequest *mdr) mds->locker->rdlock_finish(*mdr->rdlocks.begin(), mdr); while (!mdr->wrlocks.empty()) mds->locker->wrlock_finish(*mdr->wrlocks.begin(), mdr); - + // make sure ref and trace are empty // if we are doing our own locking, we can't use them! assert(mdr->ref == 0); @@ -3521,6 +3531,20 @@ void MDCache::request_cleanup(MDRequest *mdr) mdr->ref = 0; mdr->trace.clear(); + // clean up slaves + // (will implicitly drop remote dn pins) + for (set::iterator p = mdr->slaves.begin(); + p != mdr->slaves.end(); + ++p) { + MMDSSlaveRequest *r = new MMDSSlaveRequest(mdr->reqid, MMDSSlaveRequest::OP_FINISH); + mds->send_message_mds(r, *p, MDS_PORT_SERVER); + } + // strip foreign locks out of lock lists, since the above drops them implicitly. + request_forget_foreign_locks(mdr->xlocks); + request_forget_foreign_locks(mdr->wrlocks); + request_forget_foreign_locks(mdr->rdlocks); + + // drop locks request_drop_locks(mdr); @@ -3534,15 +3558,6 @@ void MDCache::request_cleanup(MDRequest *mdr) (*it)->put(MDSCacheObject::PIN_REQUEST); mdr->pins.clear(); - // clean up slaves - // (will implicitly drop remote dn pins) - for (set::iterator p = mdr->slaves.begin(); - p != mdr->slaves.end(); - ++p) { - MMDSSlaveRequest *r = new MMDSSlaveRequest(mdr->reqid, MMDSSlaveRequest::OP_FINISH); - mds->send_message_mds(r, *p, MDS_PORT_SERVER); - } - // remove from map active_requests.erase(mdr->reqid); delete mdr; diff --git a/branches/sage/cephmds2/mds/MDCache.h b/branches/sage/cephmds2/mds/MDCache.h index 3678d7ecae9d4..cc8f33f5b806c 100644 --- a/branches/sage/cephmds2/mds/MDCache.h +++ b/branches/sage/cephmds2/mds/MDCache.h @@ -104,13 +104,14 @@ struct MDRequest { int waiting_on_remote_auth_pin; // which mds? - // for rename - set extra_witnesses; // replica list from srcdn auth + // for rename/link/unlink + set extra_witnesses; // replica list from srcdn auth (rename) set witnessed; // nodes who have journaled a RenamePrepare utime_t now; int waiting_on_remote_witness; map pvmap; - + + Context *slave_commit; // --------------------------------------------------- @@ -118,17 +119,20 @@ struct MDRequest { client_request(0), ref(0), slave_request(0), slave_to_mds(-1), waiting_on_remote_auth_pin(-1), - waiting_on_remote_witness(-1) { } + waiting_on_remote_witness(-1), + slave_commit(0) { } MDRequest(metareqid_t ri, MClientRequest *req) : reqid(ri), client_request(req), ref(0), slave_request(0), slave_to_mds(-1), waiting_on_remote_auth_pin(-1), - waiting_on_remote_witness(-1) { } + waiting_on_remote_witness(-1), + slave_commit(0) { } MDRequest(metareqid_t ri, int by) : reqid(ri), client_request(0), ref(0), slave_request(0), slave_to_mds(by), waiting_on_remote_auth_pin(-1), - waiting_on_remote_witness(-1) { } + waiting_on_remote_witness(-1), + slave_commit(0) { } bool is_master() { return slave_to_mds < 0; } bool is_slave() { return slave_to_mds >= 0; } @@ -266,6 +270,7 @@ public: void request_finish(MDRequest *mdr); void request_forward(MDRequest *mdr, int mds, int port=0); void dispatch_request(MDRequest *mdr); + void request_forget_foreign_locks(set& s); void request_drop_locks(MDRequest *mdr); void request_cleanup(MDRequest *r); diff --git a/branches/sage/cephmds2/mds/Server.cc b/branches/sage/cephmds2/mds/Server.cc index 01667b165a9c3..b07cc0703a540 100644 --- a/branches/sage/cephmds2/mds/Server.cc +++ b/branches/sage/cephmds2/mds/Server.cc @@ -512,10 +512,12 @@ void Server::dispatch_client_request(MDRequest *mdr) void Server::handle_slave_request(MMDSSlaveRequest *m) { dout(4) << "handle_slave_request " << m->get_reqid() << " from " << m->get_source() << endl; - + int from = m->get_source().num(); + // reply? if (m->is_reply()) { // yay! + switch (m->get_op()) { case MMDSSlaveRequest::OP_XLOCKACK: { @@ -523,6 +525,7 @@ void Server::handle_slave_request(MMDSSlaveRequest *m) SimpleLock *lock = mds->locker->get_lock(m->get_lock_type(), m->get_object_info()); MDRequest *mdr = mdcache->request_get(m->get_reqid()); + mdr->slaves.insert(from); dout(10) << "got remote xlock on " << *lock << " on " << *lock->get_parent() << endl; mdr->xlocks.insert(lock); mdr->locks.insert(lock); @@ -531,38 +534,6 @@ void Server::handle_slave_request(MMDSSlaveRequest *m) } break; - /* - case MMDSSlaveRequest::OP_PINDNACK: - { - if (!mdcache->have_request(m->get_reqid())) - break; // must have finished, without needing this pin. - MDRequest *mdr = mdcache->request_get(m->get_reqid()); - - MDSCacheObjectInfo &info = m->get_object_info(); - CDir *dir = mdcache->get_dirfrag(info.dirfrag); - CDentry *dn = 0; - if (dir) - dn = dir->lookup(info.dname); - if (!dn) { - dout(7) << "hmm don't have dn " << info.dirfrag << " " << info.dname << endl; - break; - } - - // ok! - int from = m->get_source().num(); - dout(7) << "remote pinned on mds" << from << " dn " << *dn << endl; - mdr->remote_dn_pinning[dn].erase(from); - mdr->remote_dn_pins[dn].insert(from); - - // re-dispatch request? - if (mdr->waiting_on_remote_dn_pin) { - mdr->waiting_on_remote_dn_pin = false; - dispatch_client_request(mdr); - } - } - break; - */ - case MMDSSlaveRequest::OP_AUTHPINACK: { MDRequest *mdr = mdcache->request_get(m->get_reqid()); @@ -668,6 +639,14 @@ void Server::dispatch_slave_request(MDRequest *mdr) break; case MMDSSlaveRequest::OP_FINISH: + // slave finisher? + if (mdr->slave_commit) { + mdr->slave_commit->finish(0); + delete mdr->slave_commit; + mdr->slave_commit = 0; + } + + // finish off request. mdcache->request_finish(mdr); break; @@ -787,6 +766,9 @@ void Server::handle_slave_auth_pin_ack(MDRequest *mdr, MMDSSlaveRequest *ack) assert(mdr->waiting_on_remote_auth_pin == from); mdr->waiting_on_remote_auth_pin = -1; + // note slave + mdr->slaves.insert(from); + // go again! dispatch_client_request(mdr); } @@ -2751,15 +2733,6 @@ void Server::_rename_apply(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDen } } else { - // straydn? - if (destdn->is_primary() && !straydn) { - string straydname; - destdn->inode->name_stray_dentry(straydname); - frag_t fg = mdcache->get_stray()->pick_dirfrag(straydname); - CDir *straydir = mdcache->get_stray()->get_dirfrag(fg); - straydn = straydir->lookup(straydname); - } - // unlink destdn? if (!destdn->is_null()) destdn->dir->unlink_inode(destdn); @@ -2816,11 +2789,24 @@ void Server::_rename_apply(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDen class C_MDS_SlaveRenamePrep : public Context { Server *server; MDRequest *mdr; - CDentry *srcdn; + CDentry *srcdn, *destdn, *straydn; public: - C_MDS_SlaveRenamePrep(Server *s, MDRequest *m, CDentry *d) : server(s), mdr(m), srcdn(d) {} + C_MDS_SlaveRenamePrep(Server *s, MDRequest *m, CDentry *sr, CDentry *de, CDentry *st) : + server(s), mdr(m), srcdn(sr), destdn(de), straydn(st) {} void finish(int r) { - server->_logged_slave_rename_prep(mdr, srcdn); + server->_logged_slave_rename(mdr, srcdn, destdn, straydn); + } +}; + +class C_MDS_SlaveRenameCommit : public Context { + Server *server; + MDRequest *mdr; + CDentry *srcdn, *destdn, *straydn; +public: + C_MDS_SlaveRenameCommit(Server *s, MDRequest *m, CDentry *sr, CDentry *de, CDentry *st) : + server(s), mdr(m), srcdn(sr), destdn(de), straydn(st) {} + void finish(int r) { + server->_commit_slave_rename(mdr, srcdn, destdn, straydn); } }; @@ -2858,8 +2844,7 @@ void Server::handle_slave_rename_prep(MDRequest *mdr) dout(10) << " srcdn " << *srcdn << endl; mdr->pin(srcdn); - // open destdn stray? - CDentry *straydn = 0; + // open destdn stray dirfrag? if (destdn->is_primary()) { CInode *dstray = mdcache->get_inode(MDS_INO_STRAY(mdr->slave_to_mds)); if (!dstray) { @@ -2875,23 +2860,22 @@ void Server::handle_slave_rename_prep(MDRequest *mdr) mdcache->open_remote_dir(dstray, fg, new C_MDS_RetryRequest(mdcache, mdr)); return; } - - straydn = straydir->add_dentry(straydname, 0); - dout(10) << " straydn is " << *straydn << endl; + dout(10) << " straydir is " << *straydir << endl; } // journal it - ESlaveUpdate *le = new ESlaveUpdate("rename_prep", mdr->reqid, ESlaveUpdate::OP_PREPARE); + ESlaveUpdate *le = new ESlaveUpdate("slave_rename_prep", mdr->reqid, ESlaveUpdate::OP_PREPARE); mdr->now = mdr->slave_request->now; - _rename_prepare(mdr, &le->metablob, srcdn, destdn); + CDentry *straydn = _rename_prepare(mdr, &le->metablob, srcdn, destdn); - mds->mdlog->submit_entry(le, new C_MDS_SlaveRenamePrep(this, mdr, srcdn)); + mds->mdlog->submit_entry(le, new C_MDS_SlaveRenamePrep(this, mdr, srcdn, destdn, straydn)); } -void Server::_logged_slave_rename_prep(MDRequest *mdr, CDentry *srcdn) +void Server::_logged_slave_rename(MDRequest *mdr, + CDentry *srcdn, CDentry *destdn, CDentry *straydn) { - dout(10) << "_logged_slave_rename_prep " << *mdr << endl; + dout(10) << "_logged_slave_rename " << *mdr << endl; // ack MMDSSlaveRequest *reply = new MMDSSlaveRequest(mdr->reqid, MMDSSlaveRequest::OP_RENAMEPREPACK); @@ -2899,11 +2883,24 @@ void Server::_logged_slave_rename_prep(MDRequest *mdr, CDentry *srcdn) srcdn->list_replicas(reply->srcdn_replicas); mds->send_message_mds(reply, mdr->slave_to_mds, MDS_PORT_SERVER); + // set up commit waiter + mdr->slave_commit = new C_MDS_SlaveRenameCommit(this, mdr, srcdn, destdn, straydn); + // done. delete mdr->slave_request; mdr->slave_request = 0; } +void Server::_commit_slave_rename(MDRequest *mdr, + CDentry *srcdn, CDentry *destdn, CDentry *straydn) +{ + dout(10) << "_commit_slave_rename " << *mdr << endl; + _rename_apply(mdr, srcdn, destdn, straydn); + + // write a commit to the journal + ESlaveUpdate *le = new ESlaveUpdate("slave_rename_commit", mdr->reqid, ESlaveUpdate::OP_COMMIT); + mds->mdlog->submit_entry(le); +} void Server::handle_slave_rename_prep_ack(MDRequest *mdr, MMDSSlaveRequest *m) { @@ -2911,7 +2908,10 @@ void Server::handle_slave_rename_prep_ack(MDRequest *mdr, MMDSSlaveRequest *m) << " witnessed by " << m->get_source() << " " << *m << endl; int from = m->get_source().num(); - + + // note slave + mdr->slaves.insert(from); + // witnessed! assert(mdr->witnessed.count(from) == 0); mdr->witnessed.insert(from); diff --git a/branches/sage/cephmds2/mds/Server.h b/branches/sage/cephmds2/mds/Server.h index d881d8d8e2439..1938f2dfc5fe3 100644 --- a/branches/sage/cephmds2/mds/Server.h +++ b/branches/sage/cephmds2/mds/Server.h @@ -150,7 +150,8 @@ public: // slaving void handle_slave_rename_prep(MDRequest *mdr); void handle_slave_rename_prep_ack(MDRequest *mdr, MMDSSlaveRequest *m); - void _logged_slave_rename_prep(MDRequest *mdr, CDentry *srcdn); + void _logged_slave_rename(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn); + void _commit_slave_rename(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn); }; diff --git a/branches/sage/cephmds2/messages/MMDSSlaveRequest.h b/branches/sage/cephmds2/messages/MMDSSlaveRequest.h index b2a305130bc7a..db4d2d91d841c 100644 --- a/branches/sage/cephmds2/messages/MMDSSlaveRequest.h +++ b/branches/sage/cephmds2/messages/MMDSSlaveRequest.h @@ -29,10 +29,6 @@ class MMDSSlaveRequest : public Message { static const int OP_AUTHPIN = 3; static const int OP_AUTHPINACK = -3; - static const int OP_PINDN = 5; - static const int OP_PINDNACK = -5; - static const int OP_UNPINDN = 6; - static const int OP_RENAMEPREP = 7; static const int OP_RENAMEPREPACK = -7; @@ -49,11 +45,7 @@ class MMDSSlaveRequest : public Message { case OP_RENAMEPREP: return "rename_prep"; case OP_RENAMEPREPACK: return "rename_prep_ack"; - case OP_PINDN: return "pin_dn"; - case OP_PINDNACK: return "pin_dn_ack"; - case OP_UNPINDN: return "unpin_dn"; - - case OP_FINISH: return "finish"; + case OP_FINISH: return "finish"; // commit default: assert(0); return 0; } } @@ -66,10 +58,6 @@ class MMDSSlaveRequest : public Message { char lock_type; // lock object type MDSCacheObjectInfo object_info; - // for dn pins - inodeno_t dnpathbase; - string dnpath; - // for authpins list authpins; @@ -88,13 +76,9 @@ public: int get_lock_type() { return lock_type; } MDSCacheObjectInfo &get_object_info() { return object_info; } - inodeno_t get_dnpathbase() { return dnpathbase; } - const string& get_dnpath() { return dnpath; } - list& get_authpins() { return authpins; } void set_lock_type(int t) { lock_type = t; } - void set_dnpath(string& p, inodeno_t i) { dnpath = p; dnpathbase = i; } // ---- MMDSSlaveRequest() : Message(MSG_MDS_SLAVE_REQUEST) { } @@ -108,8 +92,6 @@ public: ::_encode(op, payload); ::_encode(lock_type, payload); object_info._encode(payload); - ::_encode(dnpath, payload); - ::_encode(dnpathbase, payload); ::_encode_complex(authpins, payload); ::_encode(srcdnpath, payload); ::_encode(destdnpath, payload); @@ -122,8 +104,6 @@ public: ::_decode(op, payload, off); ::_decode(lock_type, payload, off); object_info._decode(payload, off); - ::_decode(dnpath, payload, off); - ::_decode(dnpathbase, payload, off); ::_decode_complex(authpins, payload, off); ::_decode(srcdnpath, payload, off); ::_decode(destdnpath, payload, off); -- 2.39.5