From 62ffc1116ff16cd80a3833aa52063bbceeb68167 Mon Sep 17 00:00:00 2001 From: sageweil Date: Fri, 16 Mar 2007 00:57:53 +0000 Subject: [PATCH] * some discover cleanup * fixed CDentry state replication (preserves dirty pins) * fixed clientmap versioning vs journaling git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1252 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/sage/cephmds2/TODO | 2 + branches/sage/cephmds2/doc/mds_locks.txt | 22 +++ branches/sage/cephmds2/mds/CDentry.h | 10 +- branches/sage/cephmds2/mds/ClientMap.h | 4 +- branches/sage/cephmds2/mds/MDCache.cc | 150 +++++------------- branches/sage/cephmds2/mds/Server.cc | 9 +- .../sage/cephmds2/messages/MDiscoverReply.h | 6 +- 7 files changed, 80 insertions(+), 123 deletions(-) create mode 100644 branches/sage/cephmds2/doc/mds_locks.txt diff --git a/branches/sage/cephmds2/TODO b/branches/sage/cephmds2/TODO index 79a27ad666c51..d0b07bda951a4 100644 --- a/branches/sage/cephmds2/TODO +++ b/branches/sage/cephmds2/TODO @@ -35,6 +35,8 @@ mds - discover - hard link dentries +- rejoin and replicas that are not in recovered node's cache... fetch storm? + - mds failure vs clients - clean up client op redirection - idempotent ops diff --git a/branches/sage/cephmds2/doc/mds_locks.txt b/branches/sage/cephmds2/doc/mds_locks.txt new file mode 100644 index 0000000000000..1a337c3bc3d90 --- /dev/null +++ b/branches/sage/cephmds2/doc/mds_locks.txt @@ -0,0 +1,22 @@ + + + +path_pin = read lock on /some/random/path + - prevents a dnxlock + +dnxlock = exclusive lock on /some/random/path + - prevents dn read + - on auth + - does auth_pin + +auth_pin = pin to authority, on *dir, *in + - prevents freeze + - can hang up export + - thus blocking other auth_pins + +hard/file_wrlock = exlusive lock on inode content + - prevents inode read + - on auth + - does auth_pin + + diff --git a/branches/sage/cephmds2/mds/CDentry.h b/branches/sage/cephmds2/mds/CDentry.h index e77a33d948211..a0f283b6466d0 100644 --- a/branches/sage/cephmds2/mds/CDentry.h +++ b/branches/sage/cephmds2/mds/CDentry.h @@ -211,8 +211,9 @@ class CDentry : public MDSCacheObject, public LRUObject { mark_clean(); } void decode_import_state(bufferlist& bl, int& off, int from, int to) { - bl.copy(off, sizeof(state), (char*)&state); - off += sizeof(state); + int nstate; + bl.copy(off, sizeof(nstate), (char*)&nstate); + off += sizeof(nstate); bl.copy(off, sizeof(version), (char*)&version); off += sizeof(version); bl.copy(off, sizeof(projected_version), (char*)&projected_version); @@ -223,14 +224,15 @@ class CDentry : public MDSCacheObject, public LRUObject { ::_decode(replicas, bl, off); // twiddle - if (state_test(STATE_DIRTY)) + state = 0; + state_set(CDentry::STATE_AUTH); + if (nstate & STATE_DIRTY) _mark_dirty(); if (!replicas.empty()) get(PIN_REPLICATED); add_replica(from, EXPORT_NONCE); if (is_replica(to)) remove_replica(to); - state_set(CDentry::STATE_AUTH); } // -- locking diff --git a/branches/sage/cephmds2/mds/ClientMap.h b/branches/sage/cephmds2/mds/ClientMap.h index 062803a6d9b13..776aa8898500d 100644 --- a/branches/sage/cephmds2/mds/ClientMap.h +++ b/branches/sage/cephmds2/mds/ClientMap.h @@ -108,11 +108,11 @@ public: void add_open(int client, const entity_inst_t& inst) { inc_ref(client, inst); - version++; + //version++; } void dec_open(int client) { dec_ref(client); - version++; + //version++; } void encode(bufferlist& bl) { diff --git a/branches/sage/cephmds2/mds/MDCache.cc b/branches/sage/cephmds2/mds/MDCache.cc index 24f76cd9c2a30..67335723e43ff 100644 --- a/branches/sage/cephmds2/mds/MDCache.cc +++ b/branches/sage/cephmds2/mds/MDCache.cc @@ -2534,19 +2534,20 @@ int MDCache::path_traverse(filepath& origpath, curdir->is_auth() && curdir->is_rep() && curdir->is_replica(req->get_source().num()) && - dn->get_inode()->is_auth() + dn->is_auth() ) { assert(req->get_source().is_mds()); int from = req->get_source().num(); - if (dn->get_inode()->is_replica(from)) { + if (dn->is_replica(from)) { dout(15) << "traverse: REP would replicate to mds" << from << ", but already cached_by " << req->get_source() << " dn " << *dn << endl; } else { dout(10) << "traverse: REP replicating to " << req->get_source() << " dn " << *dn << endl; MDiscoverReply *reply = new MDiscoverReply(curdir->ino()); reply->add_dentry( dn->replicate_to( from ) ); - reply->add_inode( dn->inode->replicate_to( from ) ); + if (dn->is_primary()) + reply->add_inode( dn->inode->replicate_to( from ) ); mds->send_message_mds(reply, req->get_source().num(), MDS_PORT_CACHE); } } @@ -3390,10 +3391,8 @@ void MDCache::handle_discover(MDiscover *dis) } // open dir? - if (!curdir) { - assert(cur->is_auth()); + if (!curdir) curdir = cur->get_or_open_dirfrag(this, fg); - } assert(curdir); assert(curdir->is_auth()); @@ -3413,10 +3412,9 @@ void MDCache::handle_discover(MDiscover *dis) // add dir if (reply->is_empty() && !dis->wants_base_dir()) { - dout(7) << "they don't want the base dir" << endl; + dout(7) << "not adding unwanted base dir " << *curdir << endl; } else { - // add the dir. - assert(!curdir->auth_is_ambiguous()); + assert(!curdir->auth_is_ambiguous()); // would be frozen. reply->add_dir( new CDirDiscover( curdir, curdir->add_replica( dis->get_asker() ) ) ); dout(7) << "added dir " << *curdir << endl; @@ -3426,103 +3424,52 @@ void MDCache::handle_discover(MDiscover *dis) // lookup dentry CDentry *dn = curdir->lookup( dis->get_dentry(i) ); - /* - if (dn && !dn->can_read()) { // xlocked? - dout(7) << "waiting on " << *dn << endl; - cur->dir->add_waiter(CDir::WAIT_DNREAD, - dn->name, - new C_MDS_RetryMessage(mds, dis)); - return; - } - */ - if (dn) { - if (!dn->inode && dn->is_sync()) { - dout(7) << "mds" << whoami << " dentry " << dis->get_dentry(i) << " null in " << *curdir << ", returning error" << endl; - reply->set_flag_error_dn( dis->get_dentry(i) ); - break; // don't replicate null but non-locked dentries. - } - + // add dentry reply->add_dentry( dn->replicate_to( dis->get_asker() ) ); dout(7) << "added dentry " << *dn << endl; - if (!dn->inode) break; // we're done. - } - - // FIXME: hard links - if (dn && dn->inode) { - CInode *next = dn->inode; - assert(next->is_auth()); + if (!dn->is_primary()) break; // stop on null or remote link. - // add inode - //int nonce = next->cached_by_add(dis->get_asker()); - reply->add_inode( next->replicate_to( dis->get_asker() ) ); - dout(7) << "added inode " << *next << endl;// " nonce=" << nonce<< endl; + // add inode + CInode *next = dn->inode; + assert(next->is_auth()); + + reply->add_inode( next->replicate_to( dis->get_asker() ) ); + dout(7) << "added inode " << *next << endl; + + // descend, keep going. + cur = next; + continue; + } - // descend - cur = next; + // don't have dentry. + if (curdir->is_complete()) { + // set error flag in reply + dout(7) << "dname " << dis->get_dentry(i) << " dne in " << *curdir + << ", flagging error" << endl; + reply->set_flag_error_dn( dis->get_dentry(i) ); } else { - // don't have inode? - if (curdir->is_complete()) { - // set error flag in reply - dout(7) << "dname " << dis->get_dentry(i) << " dne in " << *curdir << ", returning error" << endl; - reply->set_flag_error_dn( dis->get_dentry(i) ); - break; + // readdir + dout(7) << "incomplete dir contents for " << *curdir << ", fetching" << endl; + + if (reply->is_empty()) { + // fetch and wait + curdir->fetch(new C_MDS_RetryMessage(mds, dis)); + return; } else { - // readdir - dout(7) << "incomplete dir contents for " << *curdir << ", fetching" << endl; - - if (reply->is_empty()) { - // fetch and wait - curdir->fetch(new C_MDS_RetryMessage(mds, dis)); - return; - } else { - // fetch, but send what we have so far - curdir->fetch(0); - break; - } + // fetch, but send what we have so far + curdir->fetch(0); } } + break; } - // how did we do. + // how did we do? if (reply->is_empty()) { - - // discard empty reply - delete reply; - dout(7) << "dropping this empty reply)." << endl; - - /* - if (cur->is_auth() && !cur->dir->is_auth()) { - // set hint - // fwd to dir auth - int dirauth = cur->dir->authority().first; - if (dirauth == dis->get_asker()) { - dout(7) << "from (new?) dir auth, dropping (obsolete) discover on floor." << endl; // XXX FIXME is this right? - //assert(dis->get_asker() == dis->get_source()); //might be a weird other loop. either way, asker has it. - delete dis; - } else { - dout(7) << "fwd to dir auth " << dirauth << endl; - mds->send_message_mds( dis, dirauth, MDS_PORT_CACHE ); - } - return; - }*/ - - /* - // wait for frozen dir? - if (cur->dir->is_frozen()) { - dout(7) << "waiting for frozen " << *cur->dir << endl; - cur->dir->add_waiter(CDir::WAIT_UNFREEZE, new C_MDS_RetryMessage(mds, dis)); - return; - } else { - dout(7) << "i'm not auth, dropping request (+this empty reply)." << endl; - } - */ - //assert(0); - + delete reply; } else { - // send back to asker dout(7) << "sending result back to asker mds" << dis->get_asker() << endl; mds->send_message_mds(reply, dis->get_asker(), MDS_PORT_CACHE); } @@ -3658,32 +3605,19 @@ void MDCache::handle_discover_reply(MDiscoverReply *m) assert(dn); if (in) { - dout(7) << "had " << *in << endl; - - // fix nonce - dout(7) << " my nonce is " << in->replica_nonce << ", taking from discover, which has " << m->get_inode(i).get_replica_nonce() << endl; + dout(7) << "had " << *in << ", new nonce " << m->get_inode(i).get_replica_nonce() << endl; in->replica_nonce = m->get_inode(i).get_replica_nonce(); - if (dn && in != dn->inode) { - dout(7) << " but it's not linked via dentry " << *dn << endl; - // link - if (dn->inode) { - dout(7) << "dentry WAS linked to " << *dn->inode << endl; - assert(0); // WTF. - } - dn->dir->link_inode(dn, in); - } + assert(in == dn->inode); // if we have it, it should be already linked to *dn. } else { - assert(dn->inode == 0); // better not be something else linked to this dentry... - // didn't have it. in = new CInode(this, false); - m->get_inode(i).update_inode(in); + add_inode( in ); // link in - add_inode( in ); + assert(dn->inode == 0); // better not be something else linked to this dentry. dn->dir->link_inode(dn, in); dout(7) << "added " << *in << " nonce " << in->replica_nonce << endl; diff --git a/branches/sage/cephmds2/mds/Server.cc b/branches/sage/cephmds2/mds/Server.cc index 0a8fac33962af..04868664ac6db 100644 --- a/branches/sage/cephmds2/mds/Server.cc +++ b/branches/sage/cephmds2/mds/Server.cc @@ -128,7 +128,7 @@ public: void Server::handle_client_mount(MClientMount *m) { - dout(3) << "mount by " << m->get_source() << endl; + dout(3) << "mount by " << m->get_source() << " oldv " << mds->clientmap.get_version() << endl; // journal it version_t cmapv = mds->clientmap.inc_projected(); @@ -138,7 +138,7 @@ void Server::handle_client_mount(MClientMount *m) void Server::handle_client_unmount(Message *m) { - dout(3) << "unmount by " << m->get_source() << endl; + dout(3) << "unmount by " << m->get_source() << " oldv " << mds->clientmap.get_version() << endl; // journal it version_t cmapv = mds->clientmap.inc_projected(); @@ -2424,8 +2424,9 @@ void Server::handle_client_openc(MClientRequest *req, CInode *diri) CDentry *dn = 0; // make dentry and inode, xlock dentry. - int r = prepare_mknod(req, diri, &dir, &in, &dn); - if (r < 0) + bool excl = (req->args.open.flags & O_EXCL); + int r = prepare_mknod(req, diri, &dir, &in, &dn, !excl); // okexist = !excl + if (r <= 0) return; // wait on something assert(dir); assert(in); diff --git a/branches/sage/cephmds2/messages/MDiscoverReply.h b/branches/sage/cephmds2/messages/MDiscoverReply.h index f47e3734c8f5e..3f2d1e6ef3cec 100644 --- a/branches/sage/cephmds2/messages/MDiscoverReply.h +++ b/branches/sage/cephmds2/messages/MDiscoverReply.h @@ -102,11 +102,7 @@ class MDiscoverReply : public Message { bool has_base_dir() { return !no_base_dir && dirs.size(); } bool has_base_dentry() { return !no_base_dentry && dentries.size(); } bool has_root() { - if (base_ino == 1) { - assert(no_base_dir && no_base_dentry); - return true; - } - return false; + return (base_ino == 1 && no_base_dir && no_base_dentry); } const string& get_path() { return path; } -- 2.39.5