From d40ae71dd8373802b0d341e6881f9472d0f50cb8 Mon Sep 17 00:00:00 2001 From: sageweil Date: Thu, 15 Mar 2007 04:40:11 +0000 Subject: [PATCH] * local unlink works, unless primary and nlink>1 git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1234 29311d96-e01e-0410-9327-a35deaab8ce9 --- .../sage/cephmds2/client/SyntheticClient.cc | 13 ++ branches/sage/cephmds2/mds/CInode.cc | 6 +- branches/sage/cephmds2/mds/MDCache.cc | 104 +++++++------ branches/sage/cephmds2/mds/MDCache.h | 15 +- branches/sage/cephmds2/mds/MDS.cc | 8 +- branches/sage/cephmds2/mds/Server.cc | 146 ++++++++++++++++-- branches/sage/cephmds2/mds/Server.h | 6 +- branches/sage/cephmds2/mds/events/EMetaBlob.h | 35 +++-- branches/sage/cephmds2/mds/journal.cc | 36 ++++- .../sage/cephmds2/messages/MDentryUnlink.h | 27 ++-- branches/sage/cephmds2/messages/MLock.h | 1 - 11 files changed, 289 insertions(+), 108 deletions(-) diff --git a/branches/sage/cephmds2/client/SyntheticClient.cc b/branches/sage/cephmds2/client/SyntheticClient.cc index 685ee26740bde..4da2e572d9ca1 100644 --- a/branches/sage/cephmds2/client/SyntheticClient.cc +++ b/branches/sage/cephmds2/client/SyntheticClient.cc @@ -1323,12 +1323,25 @@ void SyntheticClient::make_dir_mess(const char *basedir, int n) void SyntheticClient::foo() { + // link fun + /* client->mknod("one", 0755); client->mknod("two", 0755); client->link("one", "three"); client->mkdir("dir", 0755); client->link("two", "/dir/twolink"); client->link("dir/twolink", "four"); + */ + + // unlink fun + client->mknod("a", 0644); + client->unlink("a"); + client->mknod("b", 0644); + client->link("b", "c"); + client->unlink("c"); + client->mkdir("d", 0755); + client->unlink("d"); + client->rmdir("d"); } int SyntheticClient::thrash_links(const char *basedir, int dirs, int files, int depth) diff --git a/branches/sage/cephmds2/mds/CInode.cc b/branches/sage/cephmds2/mds/CInode.cc index 0d946a290fcdc..0b39fd27cc71e 100644 --- a/branches/sage/cephmds2/mds/CInode.cc +++ b/branches/sage/cephmds2/mds/CInode.cc @@ -393,9 +393,6 @@ void CInode::mark_clean() } } -// state - - @@ -425,6 +422,7 @@ void CInode::encode_hard_state(bufferlist& r) r.append((char*)&inode.uid, sizeof(inode.uid)); r.append((char*)&inode.gid, sizeof(inode.gid)); r.append((char*)&inode.ctime, sizeof(inode.ctime)); + r.append((char*)&inode.nlink, sizeof(inode.nlink)); } void CInode::decode_hard_state(bufferlist& r, int& off) @@ -437,6 +435,8 @@ void CInode::decode_hard_state(bufferlist& r, int& off) off += sizeof(inode.gid); r.copy(off, sizeof(inode.ctime), (char*)&inode.ctime); off += sizeof(inode.ctime); + r.copy(off, sizeof(inode.nlink), (char*)&inode.nlink); + off += sizeof(inode.nlink); } diff --git a/branches/sage/cephmds2/mds/MDCache.cc b/branches/sage/cephmds2/mds/MDCache.cc index fda6fd8ab5e1b..d5d86ca2f32a4 100644 --- a/branches/sage/cephmds2/mds/MDCache.cc +++ b/branches/sage/cephmds2/mds/MDCache.cc @@ -158,12 +158,6 @@ CInode *MDCache::create_inode() return in; } -void MDCache::destroy_inode(CInode *in) -{ - mds->idalloc->reclaim_id(in->ino()); - remove_inode(in); -} - void MDCache::add_inode(CInode *in) { @@ -1523,17 +1517,17 @@ public: * will be called by on unlink or rmdir * caller responsible for journaling an appropriate EUnlink or ERmdir */ -void MDCache::purge_inode(inode_t &inode) +void MDCache::purge_inode(inode_t *inode) { - dout(10) << "purge_inode " << inode.ino << " size " << inode.size << endl; + dout(10) << "purge_inode " << inode->ino << " size " << inode->size << endl; // take note - assert(purging.count(inode.ino) == 0); - purging[inode.ino] = inode; + assert(purging.count(inode->ino) == 0); + purging[inode->ino] = *inode; // remove - mds->filer->remove(inode, 0, inode.size, - 0, new C_MDC_PurgeFinish(this, inode.ino)); + mds->filer->remove(*inode, 0, inode->size, + 0, new C_MDC_PurgeFinish(this, inode->ino)); } void MDCache::purge_inode_finish(inodeno_t ino) @@ -1559,11 +1553,24 @@ void MDCache::purge_inode_finish_2(inodeno_t ino) finish_contexts(ls, 0); // reclaim ino? - + // hrm. +} + +void MDCache::add_recovered_purge(const inode_t& inode) +{ + assert(purging.count(inode.ino) == 0); + purging[inode.ino] = inode; +} + +void MDCache::remove_recovered_purge(inodeno_t ino) +{ + purging.erase(ino); } void MDCache::start_recovered_purges() { + dout(10) << "start_recovered_purges (" << purging.size() << " purges)" << endl; + for (map::iterator p = purging.begin(); p != purging.end(); ++p) { @@ -3912,6 +3919,37 @@ void MDCache::handle_dir_update(MDirUpdate *m) + +// UNLINK + +void MDCache::handle_dentry_unlink(MDentryUnlink *m) +{ + CDir *dir = get_dirfrag(m->get_dirfrag()); + + if (!dir) { + dout(7) << "handle_dentry_unlink don't have dirfrag " << m->get_dirfrag() << endl; + } + else { + CDentry *dn = dir->lookup(m->get_dn()); + if (!dn) { + dout(7) << "handle_dentry_unlink don't have dentry " << *dir << " dn " << m->get_dn() << endl; + } else { + dout(7) << "handle_dentry_unlink on " << *dn << endl; + + // unlink + dn->dir->unlink_inode(dn); + assert(dn->is_null()); + } + } + + delete m; + return; +} + + + +// OLD CRAP TO FOLLOW, will be trimmed as it's reimplemented in Server.cc + class C_MDC_DentryUnlink : public Context { public: MDCache *mdc; @@ -3931,7 +3969,6 @@ public: }; -// NAMESPACE FUN void MDCache::dentry_unlink(CDentry *dn, Context *c) { @@ -3955,7 +3992,8 @@ void MDCache::dentry_unlink(CDentry *dn, Context *c) it++) { dout(7) << "inode_unlink sending DentryUnlink to mds" << it->first << endl; - mds->send_message_mds(new MDentryUnlink(dir->ino(), dn->name), it->first, MDS_PORT_CACHE); + mds->send_message_mds(new MDentryUnlink(dir->dirfrag(), dn->name), + it->first, MDS_PORT_CACHE); } // don't need ack. @@ -4074,42 +4112,6 @@ void MDCache::dentry_unlink_finish(CDentry *dn, CDir *dir, Context *c) -void MDCache::handle_dentry_unlink(MDentryUnlink *m) -{ - CDir *dir = get_dir(m->get_dirino()); - - if (!dir) { - dout(7) << "handle_dentry_unlink don't have dir " << m->get_dirino() << endl; - } - else { - CDentry *dn = dir->lookup(m->get_dn()); - if (!dn) { - dout(7) << "handle_dentry_unlink don't have dentry " << *dir << " dn " << m->get_dn() << endl; - } else { - dout(7) << "handle_dentry_unlink on " << *dn << endl; - - // dir? - if (dn->inode) { - if (dn->inode->dir) { - dn->inode->dir->state_set(CDir::STATE_DELETED); - dn->inode->dir->remove_null_dentries(); - } - } - - string dname = dn->name; - - // unlink - dn->dir->remove_dentry(dn); - - // wake up - //dir->finish_waiting(CDir::WAIT_DNREAD, dname); - dir->take_waiting(CDir::WAIT_DNREAD, dname, mds->finished_queue); - } - } - - delete m; - return; -} void MDCache::handle_inode_unlink(MInodeUnlink *m) diff --git a/branches/sage/cephmds2/mds/MDCache.h b/branches/sage/cephmds2/mds/MDCache.h index 8d7189e8f2a38..66cb2ebac8e3f 100644 --- a/branches/sage/cephmds2/mds/MDCache.h +++ b/branches/sage/cephmds2/mds/MDCache.h @@ -277,9 +277,8 @@ public: CInode *create_inode(); void add_inode(CInode *in); - protected: void remove_inode(CInode *in); - void destroy_inode(CInode *in); + protected: void touch_inode(CInode *in) { if (in->get_parent_dn()) touch_dentry(in->get_parent_dn()); @@ -302,10 +301,18 @@ public: public: // inode purging - void purge_inode(inode_t& inode); + void purge_inode(inode_t *inode); void purge_inode_finish(inodeno_t ino); void purge_inode_finish_2(inodeno_t ino); - void waitfor_purge(inodeno_t ino, Context *c); + bool is_purging(inodeno_t ino) { + return purging.count(ino); + } + void wait_for_purge(inodeno_t ino, Context *c) { + waiting_for_purge[ino].push_back(c); + } + + void add_recovered_purge(const inode_t& inode); + void remove_recovered_purge(inodeno_t ino); void start_recovered_purges(); diff --git a/branches/sage/cephmds2/mds/MDS.cc b/branches/sage/cephmds2/mds/MDS.cc index 2c4265b812adf..71d61e25db2c5 100644 --- a/branches/sage/cephmds2/mds/MDS.cc +++ b/branches/sage/cephmds2/mds/MDS.cc @@ -515,7 +515,7 @@ void MDS::handle_mds_map(MMDSMap *m) // kick anchorclient (resent COMMITs) anchorclient->finish_recovery(); - // ... + mdcache->start_recovered_purges(); } dout(1) << "now active" << endl; @@ -800,12 +800,6 @@ void MDS::boot_replay(int step) break; case 5: - dout(2) << "boot_replay " << step << ": restarting any recovered purges" << endl; - mdcache->start_recovered_purges(); - - step++; // fall-thru - - case 6: // done with replay! if (mdsmap->get_num_mds(MDSMap::STATE_ACTIVE) == 0 && mdsmap->get_num_mds(MDSMap::STATE_STOPPING) == 0 && diff --git a/branches/sage/cephmds2/mds/Server.cc b/branches/sage/cephmds2/mds/Server.cc index dbe90c528454d..6ae44e9ec533b 100644 --- a/branches/sage/cephmds2/mds/Server.cc +++ b/branches/sage/cephmds2/mds/Server.cc @@ -29,6 +29,7 @@ #include "messages/MLock.h" +#include "messages/MDentryUnlink.h" #include "messages/MInodeLink.h" #include "events/EString.h" @@ -1393,8 +1394,8 @@ void Server::link_remote(MClientRequest *req, CInode *ref, // 3. send LinkCommit to dest (unlocks target on dest, journals commit) // IMPLEMENT ME - MClientReply *reply = new MClientReply(req, -EAGAIN); - reply_request(req, reply, dn->get_dir()->get_inode()); // FIXME: imprecise ref + MClientReply *reply = new MClientReply(req, -EXDEV); + reply_request(req, reply, dn->get_dir()->get_inode()); } @@ -1569,6 +1570,18 @@ void Server::handle_client_unlink(MClientRequest *req, } dout(7) << "handle_client_unlink/rmdir on " << *in << endl; + + // treat this like a rename? + if (dn->is_primary() && // primary link, and + (in->inode.nlink > 1 || // there are other hard links, or + in->get_caps_wanted())) { // file is open (FIXME need better condition here) + // treat as a rename into the dangledir. + + // IMPLEMENT ME **** FIXME **** + MClientReply *reply = new MClientReply(req, -EXDEV); + reply_request(req, reply, dn->get_dir()->get_inode()); + return; + } // xlock dentry if (!mds->locker->dentry_xlock_start(dn, req, diri)) @@ -1578,37 +1591,140 @@ void Server::handle_client_unlink(MClientRequest *req, // ok! if (dn->is_remote() && !dn->inode->is_auth()) - _unlink_remote(req, dn); + _unlink_remote(req, dn, in); else - _unlink_local(req, dn); + _unlink_local(req, dn, in); } -void Server::_unlink_local(MClientRequest *req, CDentry *dn) + +class C_MDS_unlink_local_finish : public Context { + MDS *mds; + MClientRequest *req; + CDentry *dn; + CInode *in; + version_t ipv; + time_t ictime; + version_t dpv; +public: + C_MDS_unlink_local_finish(MDS *m, MClientRequest *r, CDentry *d, CInode *i, + version_t v, time_t ct) : + mds(m), req(r), dn(d), in(i), + ipv(v), ictime(ct), + dpv(d->get_projected_version()) { } + void finish(int r) { + assert(r == 0); + mds->server->_unlink_local_finish(req, dn, in, ipv, ictime, dpv); + } +}; + + +void Server::_unlink_local(MClientRequest *req, CDentry *dn, CInode *in) { + dout(10) << "_unlink_local " << *dn << endl; + + // if we're not the only link, wrlock the target (we need to nlink--) + if (in->inode.nlink > 1) { + assert(dn->is_remote()); // unlinking primary is handled like a rename.. not here - /* - // it's locked, unlink! - MClientReply *reply = new MClientReply(req,0); - mdcache->dentry_unlink(dn, - new C_MDS_CommitRequest(this, req, reply, diri, - new EString("unlink fixme"))); - */ + dout(10) << "_unlink_local nlink>1, wrlocking " << *in << endl; + if (!mds->locker->inode_hard_write_start(in, req)) + return; // fw or (wait for) lock + } + + // ok, let's do it. + // prepare log entry + EUpdate *le = new EUpdate("unlink_local"); + + // predirty + version_t ipv = in->pre_dirty(); + if (dn->is_remote()) + dn->pre_dirty(); // predirty dentry too + + // the unlinked dentry + le->metablob.add_dir_context(dn->get_dir()); + le->metablob.add_null_dentry(dn, true); + + // remote inode nlink--? + inode_t *pi = 0; + if (dn->is_remote()) { + le->metablob.add_dir_context(in->get_parent_dir()); + pi = le->metablob.add_dentry(in->parent, true, in); + + // update journaled target inode + pi->nlink--; + pi->ctime = g_clock.gettime(); + pi->version = ipv; + } else { + le->metablob.add_destroyed_inode(in->inode); + } + + // finisher + C_MDS_unlink_local_finish *fin = new C_MDS_unlink_local_finish(mds, req, dn, in, + ipv, pi ? pi->ctime:0); + + // log + wait + mdlog->submit_entry(le); + mdlog->wait_for_sync(fin); } void Server::_unlink_local_finish(MClientRequest *req, - CDentry *dn, CInode *targeti, - version_t, time_t, version_t) + CDentry *dn, CInode *in, + version_t ipv, time_t ictime, version_t dpv) { + dout(10) << "_unlink_local " << *dn << endl; + // update remote inode? + if (dn->is_remote()) { + assert(ipv); + assert(ictime); + in->inode.ctime = ictime; + in->inode.nlink--; + in->mark_dirty(ipv); + // unlock inode (and share nlink news w/ replicas) + mds->locker->inode_hard_write_finish(in); + } + + // unlink inode (dn now null) + CDir *dir = dn->dir; + dn->mark_dirty(dpv); + dir->unlink_inode(dn); + + // share unlink news with replicas + for (map::iterator it = dn->replicas_begin(); + it != dn->replicas_end(); + it++) { + dout(7) << "_unlink_local_finish sending MDentryUnlink to mds" << it->first << endl; + mds->send_message_mds(new MDentryUnlink(dir->dirfrag(), dn->name), it->first, MDS_PORT_CACHE); + } + + // unlock (now null) dn + mds->locker->dentry_xlock_finish(dn); + + // purge+remove inode? + if (in->inode.nlink == 0) { + mdcache->purge_inode(&in->inode); + mdcache->remove_inode(in); + } + + // bump target popularity + mds->balancer->hit_dir(dir, META_POP_DWR); + + // reply + MClientReply *reply = new MClientReply(req, 0); + reply_request(req, reply, dir->get_inode()); // FIXME: imprecise ref } -void Server::_unlink_remote(MClientRequest *req, CDentry *dn) +void Server::_unlink_remote(MClientRequest *req, CDentry *dn, CInode *in) { + + // IMPLEMENT ME + MClientReply *reply = new MClientReply(req, -EXDEV); + reply_request(req, reply, dn->get_dir()->get_inode()); } diff --git a/branches/sage/cephmds2/mds/Server.h b/branches/sage/cephmds2/mds/Server.h index 01452702ac540..d70a5dbeea760 100644 --- a/branches/sage/cephmds2/mds/Server.h +++ b/branches/sage/cephmds2/mds/Server.h @@ -107,11 +107,11 @@ public: // unlink void handle_client_unlink(MClientRequest *req, CInode *ref); bool _verify_rmdir(MClientRequest *req, CInode *ref, CInode *rmdiri); - void _unlink_local(MClientRequest *req, CDentry *dn); + void _unlink_local(MClientRequest *req, CDentry *dn, CInode *in); void _unlink_local_finish(MClientRequest *req, - CDentry *dn, CInode *targeti, + CDentry *dn, CInode *in, version_t, time_t, version_t); - void _unlink_remote(MClientRequest *req, CDentry *dn); + void _unlink_remote(MClientRequest *req, CDentry *dn, CInode *in); // rename void handle_client_rename(MClientRequest *req, CInode *ref); diff --git a/branches/sage/cephmds2/mds/events/EMetaBlob.h b/branches/sage/cephmds2/mds/events/EMetaBlob.h index 9166167c81ddb..63e1461e06c3b 100644 --- a/branches/sage/cephmds2/mds/events/EMetaBlob.h +++ b/branches/sage/cephmds2/mds/events/EMetaBlob.h @@ -212,13 +212,34 @@ class EMetaBlob { // anchor transactions included in this update. list atids; + // inodes i've destroyed. + list destroyed_inodes; + public: void add_anchor_transaction(version_t atid) { atids.push_back(atid); } + + void add_destroyed_inode(const inode_t& inode) { + destroyed_inodes.push_back(inode); + } - // remote pointer to to-be-journaled inode iff it's a normal (non-remote) dentry + void add_null_dentry(CDentry *dn, bool dirty) { + dirlump& lump = add_dir(dn->get_dir(), false); + + lump.nnull++; + if (dirty) + lump.get_dnull().push_front(nullbit(dn->get_name(), + dn->get_projected_version(), + dirty)); + else + lump.get_dnull().push_back(nullbit(dn->get_name(), + dn->get_projected_version(), + dirty)); + } + + // return remote pointer to to-be-journaled inode iff it's a normal (non-remote) dentry inode_t *add_dentry(CDentry *dn, bool dirty, CInode *in=0) { CDir *dir = dn->get_dir(); if (!in) in = dn->get_inode(); @@ -241,15 +262,7 @@ class EMetaBlob { dirty)); } else if (!in) { - lump.nnull++; - if (dirty) - lump.get_dnull().push_front(nullbit(dn->get_name(), - dn->get_projected_version(), - dirty)); - else - lump.get_dnull().push_back(nullbit(dn->get_name(), - dn->get_projected_version(), - dirty)); + add_null_dentry(dn, dirty); } else { lump.nfull++; @@ -311,6 +324,7 @@ class EMetaBlob { lump_map[*i]._encode(bl); } ::_encode(atids, bl); + ::_encode(destroyed_inodes, bl); } void _decode(bufferlist& bl, int& off) { int n; @@ -324,6 +338,7 @@ class EMetaBlob { lump_map[dirfrag]._decode(bl, off); } ::_decode(atids, bl, off); + ::_decode(destroyed_inodes, bl, off); } void print(ostream& out) const { diff --git a/branches/sage/cephmds2/mds/journal.cc b/branches/sage/cephmds2/mds/journal.cc index 6ffdf8b68a3db..00a9655726a38 100644 --- a/branches/sage/cephmds2/mds/journal.cc +++ b/branches/sage/cephmds2/mds/journal.cc @@ -132,8 +132,18 @@ bool EMetaBlob::has_expired(MDS *mds) return false; } } + + // destroyed inodes + for (list::iterator p = destroyed_inodes.begin(); + p != destroyed_inodes.end(); + ++p) { + if (mds->mdcache->is_purging(p->ino)) { + dout(10) << "EMetaBlob.has_expired still purging destroyed inode " << p->ino << endl; + return false; + } + } - return true; // all dirlumps expired. + return true; // all dirlumps expired, etc. } @@ -220,6 +230,17 @@ void EMetaBlob::expire(MDS *mds, Context *c) mds->anchorclient->wait_for_ack(*p, gather->new_sub()); } } + + // destroyed inodes + for (list::iterator p = destroyed_inodes.begin(); + p != destroyed_inodes.end(); + ++p) { + if (mds->mdcache->is_purging(p->ino)) { + dout(10) << "EMetaBlob.expire waiting for purge of destroyed inode " << p->ino << endl; + mds->mdcache->wait_for_purge(p->ino, gather->new_sub()); + } + } + } void EMetaBlob::replay(MDS *mds) @@ -323,13 +344,21 @@ void EMetaBlob::replay(MDS *mds) } } + // anchor transactions for (list::iterator p = atids.begin(); p != atids.end(); ++p) { dout(10) << "EMetaBlob.replay noting anchor transaction " << *p << endl; mds->anchorclient->got_journaled_agree(*p); } - + + // destroyed inodes + for (list::iterator p = destroyed_inodes.begin(); + p != destroyed_inodes.end(); + ++p) { + dout(10) << "EMetaBlob.replay will purge destroyed inode " << p->ino << endl; + mds->mdcache->add_recovered_purge(*p); + } } // ----------------------- @@ -662,10 +691,13 @@ bool EPurgeFinish::has_expired(MDS *mds) void EPurgeFinish::expire(MDS *mds, Context *c) { + assert(0); } void EPurgeFinish::replay(MDS *mds) { + dout(10) << "EPurgeFinish.replay " << ino << endl; + mds->mdcache->remove_recovered_purge(ino); } diff --git a/branches/sage/cephmds2/messages/MDentryUnlink.h b/branches/sage/cephmds2/messages/MDentryUnlink.h index 17dd173830527..c46bdb0ad2571 100644 --- a/branches/sage/cephmds2/messages/MDentryUnlink.h +++ b/branches/sage/cephmds2/messages/MDentryUnlink.h @@ -16,29 +16,32 @@ #define __MDENTRYUNLINK_H class MDentryUnlink : public Message { - inodeno_t dirino; + dirfrag_t dirfrag; string dn; public: - inodeno_t get_dirino() { return dirino; } + dirfrag_t get_dirfrag() { return dirfrag; } string& get_dn() { return dn; } MDentryUnlink() {} - MDentryUnlink(inodeno_t dirino, string& dn) : - Message(MSG_MDS_DENTRYUNLINK) { - this->dirino = dirino; - this->dn = dn; + MDentryUnlink(dirfrag_t df, string& n) : + Message(MSG_MDS_DENTRYUNLINK), + dirfrag(df), + dn(n) { } + + char *get_type_name() { return "dentry_unlink";} + void print(ostream& o) { + o << "dentry_unlink(" << dirfrag << " " << dn << ")"; } - virtual char *get_type_name() { return "Dun";} - virtual void decode_payload() { + void decode_payload() { int off = 0; - payload.copy(off, sizeof(dirino), (char*)&dirino); - off += sizeof(dirino); + payload.copy(off, sizeof(dirfrag), (char*)&dirfrag); + off += sizeof(dirfrag); ::_decode(dn, payload, off); } - virtual void encode_payload() { - payload.append((char*)&dirino,sizeof(dirino)); + void encode_payload() { + payload.append((char*)&dirfrag,sizeof(dirfrag)); ::_encode(dn, payload); } }; diff --git a/branches/sage/cephmds2/messages/MLock.h b/branches/sage/cephmds2/messages/MLock.h index df3f413f76303..804cb272bb031 100644 --- a/branches/sage/cephmds2/messages/MLock.h +++ b/branches/sage/cephmds2/messages/MLock.h @@ -31,7 +31,6 @@ #define LOCK_AC_REQXLOCKNAK 10 // req dentry xlock #define LOCK_AC_LOCKNAK 12 // for dentry xlock - #define LOCK_AC_FOR_REPLICA(a) ((a) <= 10) #define LOCK_AC_FOR_AUTH(a) ((a) >= 11) -- 2.39.5