From 6c46386e9866fb42a0ec17ec8dd6fa73af4fd62e Mon Sep 17 00:00:00 2001 From: sageweil Date: Tue, 3 Apr 2007 03:54:11 +0000 Subject: [PATCH] * hardlock broken into auth, link, and dirfragtree locks git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1334 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/sage/cephmds2/client/Client.cc | 7 +- branches/sage/cephmds2/include/types.h | 30 ++--- branches/sage/cephmds2/mds/CInode.cc | 105 ++++++++-------- branches/sage/cephmds2/mds/CInode.h | 115 ++++++++---------- branches/sage/cephmds2/mds/Locker.cc | 20 ++- branches/sage/cephmds2/mds/MDCache.cc | 37 +++--- branches/sage/cephmds2/mds/Migrator.cc | 44 ++++--- branches/sage/cephmds2/mds/Server.cc | 10 +- branches/sage/cephmds2/mds/SimpleLock.h | 44 +++++-- .../sage/cephmds2/messages/MClientReply.h | 12 +- .../cephmds2/messages/MMDSCacheRejoinAck.h | 13 +- 11 files changed, 239 insertions(+), 198 deletions(-) diff --git a/branches/sage/cephmds2/client/Client.cc b/branches/sage/cephmds2/client/Client.cc index 66c123253ca24..65aca4101db39 100644 --- a/branches/sage/cephmds2/client/Client.cc +++ b/branches/sage/cephmds2/client/Client.cc @@ -1679,11 +1679,12 @@ int Client::lstatlite(const char *relpath, struct statlite *stl) tout << path << endl; // make mask - int mask = INODE_MASK_BASE | INODE_MASK_PERM; + // FIXME. + int mask = INODE_MASK_BASE | INODE_MASK_AUTH; if (S_ISVALIDSIZE(stl->st_litemask) || S_ISVALIDBLOCKS(stl->st_litemask)) mask |= INODE_MASK_SIZE; - if (S_ISVALIDMTIME(stl->st_litemask)) mask |= INODE_MASK_MTIME; - if (S_ISVALIDATIME(stl->st_litemask)) mask |= INODE_MASK_ATIME; + if (S_ISVALIDMTIME(stl->st_litemask)) mask |= INODE_MASK_FILE; + if (S_ISVALIDATIME(stl->st_litemask)) mask |= INODE_MASK_FILE; Inode *in = 0; int res = _lstat(path, mask, &in); diff --git a/branches/sage/cephmds2/include/types.h b/branches/sage/cephmds2/include/types.h index 214809779f849..194d8e2c0f072 100644 --- a/branches/sage/cephmds2/include/types.h +++ b/branches/sage/cephmds2/include/types.h @@ -224,15 +224,18 @@ namespace __gnu_cxx { #define FILE_MODE_RW (1|2) #define FILE_MODE_LAZY 4 -#define INODE_MASK_BASE 1 // ino, nlink -#define INODE_MASK_PERM 2 // uid, gid, mode -#define INODE_MASK_SIZE 4 // size, blksize, blocks -#define INODE_MASK_CTIME 8 // ctime -#define INODE_MASK_MTIME 16 // mtime -#define INODE_MASK_ATIME 32 // atime +#define INODE_MASK_BASE 1 // ino, layout, symlink value +#define INODE_MASK_AUTH 2 // uid, gid, mode +#define INODE_MASK_LINK 4 // nlink, anchored +#define INODE_MASK_FILE 8 // mtime, size. +// atime? -#define INODE_MASK_ALL_STAT (INODE_MASK_BASE|INODE_MASK_PERM|INODE_MASK_SIZE|INODE_MASK_MTIME) -//#define INODE_MASK_ALL_STAT (INODE_MASK_BASE|INODE_MASK_PERM|INODE_MASK_SIZE|INODE_MASK_MTIME|INODE_MASK_ATIME) +#define INODE_MASK_ALL_STAT (INODE_MASK_BASE|INODE_MASK_AUTH|INODE_MASK_LINK|INODE_MASK_FILE) + +#define INODE_MASK_SIZE INODE_MASK_FILE // size, blksize, blocks +#define INODE_MASK_MTIME INODE_MASK_FILE // mtime +#define INODE_MASK_ATIME INODE_MASK_FILE // atime +#define INODE_MASK_CTIME (INODE_MASK_FILE|INODE_MASK_AUTH|INODE_MASK_LINK) // ctime struct inode_t { // base (immutable) @@ -242,23 +245,22 @@ struct inode_t { // affected by any inode change... utime_t ctime; // inode change time - // nlink - int nlink; - bool anchored; // auth only? - // perm (namespace permissions) mode_t mode; uid_t uid; gid_t gid; + // nlink + int nlink; + bool anchored; // auth only? + // file (data access) off_t size; utime_t mtime; // file data modify time. utime_t atime; // file data access time. - int mask; - // special stuff + int mask; // used for client stat. hack. version_t version; // auth only version_t file_data_version; // auth only diff --git a/branches/sage/cephmds2/mds/CInode.cc b/branches/sage/cephmds2/mds/CInode.cc index 3b4ef6b41c453..7496852637c48 100644 --- a/branches/sage/cephmds2/mds/CInode.cc +++ b/branches/sage/cephmds2/mds/CInode.cc @@ -60,9 +60,11 @@ ostream& operator<<(ostream& out, CInode& in) out << " v" << in.get_version(); - out << " hard=" << in.hardlock; + out << " auth=" << in.authlock; + out << " link=" << in.linklock; + out << " dft=" << in.dirfragtreelock; out << " file=" << in.filelock; - + if (in.get_num_ref()) { out << " |"; in.print_pin_set(out); @@ -92,8 +94,11 @@ void CInode::print(ostream& out) // ====== CInode ======= -CInode::CInode(MDCache *c, bool auth) : hardlock(this, LOCK_OTYPE_IHARD, WAIT_HARDLOCK_OFFSET), - filelock(this, LOCK_OTYPE_IFILE, WAIT_FILELOCK_OFFSET) +CInode::CInode(MDCache *c, bool auth) : + authlock(this, LOCK_OTYPE_IAUTH, WAIT_AUTHLOCK_OFFSET), + linklock(this, LOCK_OTYPE_ILINK, WAIT_LINKLOCK_OFFSET), + dirfragtreelock(this, LOCK_OTYPE_IDIRFRAGTREE, WAIT_DIRFRAGTREELOCK_OFFSET), + filelock(this, LOCK_OTYPE_IFILE, WAIT_FILELOCK_OFFSET) { mdcache = c; @@ -288,7 +293,6 @@ void CInode::name_stray_dentry(string& dname) } - version_t CInode::pre_dirty() { assert(parent); @@ -328,6 +332,7 @@ void CInode::mark_dirty(version_t pv) { parent->mark_dirty(pv); } + void CInode::mark_clean() { dout(10) << " mark_clean " << *this << endl; @@ -350,12 +355,27 @@ void CInode::set_mlock_info(MLock *m) void CInode::encode_lock_state(int type, bufferlist& bl) { switch (type) { - case LOCK_OTYPE_IFILE: - encode_file_state(bl); + case LOCK_OTYPE_IAUTH: + ::_encode(inode.mode, bl); + ::_encode(inode.uid, bl); + ::_encode(inode.gid, bl); + break; + + case LOCK_OTYPE_ILINK: + ::_encode(inode.nlink, bl); + ::_encode(inode.anchored, bl); break; - case LOCK_OTYPE_IHARD: - encode_hard_state(bl); + + case LOCK_OTYPE_IDIRFRAGTREE: + dirfragtree._encode(bl); + break; + + case LOCK_OTYPE_IFILE: + ::_encode(inode.size, bl); + ::_encode(inode.mtime, bl); + ::_encode(inode.atime, bl); break; + default: assert(0); } @@ -365,61 +385,32 @@ void CInode::decode_lock_state(int type, bufferlist& bl) { int off = 0; switch (type) { - case LOCK_OTYPE_IFILE: - decode_file_state(bl, off); + case LOCK_OTYPE_IAUTH: + ::_decode(inode.mode, bl, off); + ::_decode(inode.uid, bl, off); + ::_decode(inode.gid, bl, off); break; - case LOCK_OTYPE_IHARD: - decode_hard_state(bl, off); - break; - default: - assert(0); - } -} - + case LOCK_OTYPE_ILINK: + ::_decode(inode.nlink, bl, off); + ::_decode(inode.anchored, bl, off); + break; -// new state encoders - -void CInode::encode_file_state(bufferlist& bl) -{ - bl.append((char*)&inode.size, sizeof(inode.size)); - bl.append((char*)&inode.mtime, sizeof(inode.mtime)); - bl.append((char*)&inode.atime, sizeof(inode.atime)); // ?? -} - -void CInode::decode_file_state(bufferlist& r, int& off) -{ - r.copy(off, sizeof(inode.size), (char*)&inode.size); - off += sizeof(inode.size); - r.copy(off, sizeof(inode.mtime), (char*)&inode.mtime); - off += sizeof(inode.mtime); - r.copy(off, sizeof(inode.atime), (char*)&inode.atime); - off += sizeof(inode.atime); -} + case LOCK_OTYPE_IDIRFRAGTREE: + dirfragtree._decode(bl, off); + break; + case LOCK_OTYPE_IFILE: + ::_decode(inode.size, bl, off); + ::_decode(inode.mtime, bl, off); + ::_decode(inode.atime, bl, off); + break; -void CInode::encode_hard_state(bufferlist& r) -{ - r.append((char*)&inode.mode, sizeof(inode.mode)); - r.append((char*)&inode.uid, sizeof(inode.uid)); - r.append((char*)&inode.gid, sizeof(inode.gid)); - r.append((char*)&inode.ctime, sizeof(inode.ctime)); - r.append((char*)&inode.nlink, sizeof(inode.nlink)); + default: + assert(0); + } } -void CInode::decode_hard_state(bufferlist& r, int& off) -{ - r.copy(off, sizeof(inode.mode), (char*)&inode.mode); - off += sizeof(inode.mode); - r.copy(off, sizeof(inode.uid), (char*)&inode.uid); - off += sizeof(inode.uid); - r.copy(off, sizeof(inode.gid), (char*)&inode.gid); - off += sizeof(inode.gid); - r.copy(off, sizeof(inode.ctime), (char*)&inode.ctime); - off += sizeof(inode.ctime); - r.copy(off, sizeof(inode.nlink), (char*)&inode.nlink); - off += sizeof(inode.nlink); -} diff --git a/branches/sage/cephmds2/mds/CInode.h b/branches/sage/cephmds2/mds/CInode.h index d40aa2190359c..4f600f6cfb963 100644 --- a/branches/sage/cephmds2/mds/CInode.h +++ b/branches/sage/cephmds2/mds/CInode.h @@ -102,8 +102,10 @@ class CInode : public MDSCacheObject { static const int WAIT_UNLINK = (1<<7); // as in remotely nlink-- static const int WAIT_CAPS = (1<<8); - static const int WAIT_HARDLOCK_OFFSET = 9; - static const int WAIT_FILELOCK_OFFSET = 17; + static const int WAIT_AUTHLOCK_OFFSET = 9; + static const int WAIT_LINKLOCK_OFFSET = 9 + SimpleLock::WAIT_BITS; + static const int WAIT_DIRFRAGTREELOCK_OFFSET = 9 + 2*SimpleLock::WAIT_BITS;; + static const int WAIT_FILELOCK_OFFSET = 9 + 3*SimpleLock::WAIT_BITS;; static const int WAIT_ANY = 0xffffffff; @@ -208,13 +210,6 @@ protected: void name_stray_dentry(string& dname); - // -- state encoding -- - void encode_file_state(bufferlist& r); - void decode_file_state(bufferlist& r, int& off); - - void encode_hard_state(bufferlist& r); - void decode_hard_state(bufferlist& r, int& off); - // -- dirtyness -- version_t get_version() { return inode.version; } @@ -236,13 +231,17 @@ protected: // -- locks -- public: - SimpleLock hardlock; + SimpleLock authlock; + SimpleLock linklock; + SimpleLock dirfragtreelock; FileLock filelock; SimpleLock* get_lock(int type) { switch (type) { case LOCK_OTYPE_IFILE: return &filelock; - case LOCK_OTYPE_IHARD: return &hardlock; + case LOCK_OTYPE_IAUTH: return &authlock; + case LOCK_OTYPE_ILINK: return &linklock; + case LOCK_OTYPE_IDIRFRAGTREE: return &dirfragtreelock; default: assert(0); } } @@ -332,24 +331,16 @@ public: void replicate_relax_locks() { + dout(10) << " relaxing locks on " << *this << endl; assert(is_auth()); assert(!is_replicated()); - dout(10) << " relaxing locks on " << *this << endl; - if (hardlock.get_state() == LOCK_LOCK && - !hardlock.is_used()) { - dout(10) << " hard now sync " << *this << endl; - hardlock.set_state(LOCK_SYNC); - } - if (filelock.get_state() == LOCK_LOCK) { - if (!filelock.is_used() && - (get_caps_issued() & CAP_FILE_WR) == 0) { - filelock.set_state(LOCK_SYNC); - dout(10) << " file now sync " << *this << endl; - } else { - dout(10) << " can't relax filelock on " << *this << endl; - } - } + authlock.replicate_relax(); + linklock.replicate_relax(); + dirfragtreelock.replicate_relax(); + + if (get_caps_issued() & (CAP_FILE_WR|CAP_FILE_WRBUFFER) == 0) + filelock.replicate_relax(); } @@ -449,7 +440,9 @@ class CInodeDiscover { int replica_nonce; - int hardlock_state; + int authlock_state; + int linklock_state; + int dirfragtreelock_state; int filelock_state; public: @@ -461,7 +454,9 @@ class CInodeDiscover { replica_nonce = nonce; - hardlock_state = in->hardlock.get_replica_state(); + authlock_state = in->authlock.get_replica_state(); + linklock_state = in->linklock.get_replica_state(); + dirfragtreelock_state = in->dirfragtreelock.get_replica_state(); filelock_state = in->filelock.get_replica_state(); } @@ -474,30 +469,32 @@ class CInodeDiscover { in->dirfragtree = dirfragtree; in->replica_nonce = replica_nonce; - in->hardlock.set_state(hardlock_state); + in->authlock.set_state(authlock_state); + in->linklock.set_state(linklock_state); + in->dirfragtreelock.set_state(dirfragtreelock_state); in->filelock.set_state(filelock_state); } void _encode(bufferlist& bl) { - bl.append((char*)&inode, sizeof(inode)); + ::_encode(inode, bl); ::_encode(symlink, bl); dirfragtree._encode(bl); - bl.append((char*)&replica_nonce, sizeof(replica_nonce)); - bl.append((char*)&hardlock_state, sizeof(hardlock_state)); - bl.append((char*)&filelock_state, sizeof(filelock_state)); + ::_encode(replica_nonce, bl); + ::_encode(authlock_state, bl); + ::_encode(linklock_state, bl); + ::_encode(dirfragtreelock_state, bl); + ::_encode(filelock_state, bl); } void _decode(bufferlist& bl, int& off) { - bl.copy(off,sizeof(inode), (char*)&inode); - off += sizeof(inode); + ::_decode(inode, bl, off); ::_decode(symlink, bl, off); dirfragtree._decode(bl, off); - bl.copy(off, sizeof(replica_nonce), (char*)&replica_nonce); - off += sizeof(replica_nonce); - bl.copy(off, sizeof(hardlock_state), (char*)&hardlock_state); - off += sizeof(hardlock_state); - bl.copy(off, sizeof(filelock_state), (char*)&filelock_state); - off += sizeof(filelock_state); + ::_decode(replica_nonce, bl, off); + ::_decode(authlock_state, bl, off); + ::_decode(linklock_state, bl, off); + ::_decode(dirfragtreelock_state, bl, off); + ::_decode(filelock_state, bl, off); } }; @@ -507,7 +504,7 @@ class CInodeDiscover { class CInodeExport { - struct { + struct st_ { inode_t inode; meta_load_t popularity_justme; @@ -523,9 +520,7 @@ class CInodeExport { map replicas; map cap_map; - bufferlist hardlock; - bufferlist filelock; - //int remaining_issued; + bufferlist locks; public: CInodeExport() {} @@ -537,8 +532,10 @@ public: st.is_dirty = in->is_dirty(); replicas = in->replicas; - in->hardlock._encode(hardlock); - in->filelock._encode(filelock); + in->authlock._encode(locks); + in->linklock._encode(locks); + in->dirfragtreelock._encode(locks); + in->filelock._encode(locks); st.popularity_justme.take( in->popularity[MDS_POP_JUSTME] ); st.popularity_curdom.take( in->popularity[MDS_POP_CURDOM] ); @@ -572,9 +569,10 @@ public: in->get(CInode::PIN_REPLICATED); int off = 0; - in->hardlock._decode(hardlock, off); - off = 0; - in->filelock._decode(filelock, off); + in->authlock._decode(locks, off); + in->linklock._decode(locks, off); + in->dirfragtreelock._decode(locks, off); + in->filelock._decode(locks, off); // caps in->merge_client_caps(cap_map, new_client_caps); @@ -582,15 +580,12 @@ public: void _encode(bufferlist& bl) { st.num_caps = cap_map.size(); - bl.append((char*)&st, sizeof(st)); + + ::_encode(st, bl); ::_encode(symlink, bl); dirfragtree._encode(bl); - - // cached_by + nonce ::_encode(replicas, bl); - - ::_encode(hardlock, bl); - ::_encode(filelock, bl); + ::_encode(locks, bl); // caps for (map::iterator it = cap_map.begin(); @@ -602,15 +597,11 @@ public: } int _decode(bufferlist& bl, int off = 0) { - bl.copy(off, sizeof(st), (char*)&st); - off += sizeof(st); + ::_decode(st, bl, off); ::_decode(symlink, bl, off); dirfragtree._decode(bl, off); - ::_decode(replicas, bl, off); - - ::_decode(hardlock, bl, off); - ::_decode(filelock, bl, off); + ::_decode(locks, bl, off); // caps for (int i=0; iget_inode(m->get_ino()); @@ -701,8 +703,14 @@ void Locker::handle_lock(MLock *m) return; } switch (m->get_otype()) { - case LOCK_OTYPE_IHARD: - handle_simple_lock(&in->hardlock, m); + case LOCK_OTYPE_IAUTH: + handle_simple_lock(&in->authlock, m); + break; + case LOCK_OTYPE_ILINK: + handle_simple_lock(&in->linklock, m); + break; + case LOCK_OTYPE_IDIRFRAGTREE: + handle_simple_lock(&in->dirfragtreelock, m); break; case LOCK_OTYPE_IFILE: handle_file_lock(&in->filelock, m); @@ -1043,6 +1051,12 @@ void Locker::simple_xlock_finish(SimpleLock *lock, MDRequest *mdr) mdr->locks.erase(lock); dout(7) << "simple_xlock_finish on " << *lock << " on " << *lock->get_parent() << endl; + // slave? + if (!lock->get_parent()->is_auth()) { + mds->send_message_mds(new MLock(lock, LOCK_AC_UNXLOCK, mds->get_nodeid()), + lock->get_parent()->authority().first, MDS_PORT_LOCKER); + } + // others waiting? if (lock->is_waiter_for(SimpleLock::WAIT_WR)) { // wake 'em up diff --git a/branches/sage/cephmds2/mds/MDCache.cc b/branches/sage/cephmds2/mds/MDCache.cc index ad41a40d890a4..05dc475781a65 100644 --- a/branches/sage/cephmds2/mds/MDCache.cc +++ b/branches/sage/cephmds2/mds/MDCache.cc @@ -1488,11 +1488,16 @@ void MDCache::handle_cache_rejoin(MMDSCacheRejoin *m) in->mds_caps_wanted[from] = p->second; else in->mds_caps_wanted.erase(from); - in->hardlock.remove_gather(from); // just in case + in->authlock.remove_gather(from); // just in case + in->linklock.remove_gather(from); // just in case + in->dirfragtreelock.remove_gather(from); // just in case in->filelock.remove_gather(from); // just in case dout(10) << " has " << *in << endl; ack->add_inode(p->first, - in->hardlock.get_replica_state(), in->filelock.get_replica_state(), + in->authlock.get_replica_state(), + in->linklock.get_replica_state(), + in->dirfragtreelock.get_replica_state(), + in->filelock.get_replica_state(), nonce); } @@ -1537,7 +1542,9 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoinAck *m) CInode *in = get_inode(p->ino); assert(in); in->set_replica_nonce(p->nonce); - in->hardlock.set_state(p->hardlock); + in->authlock.set_state(p->authlock); + in->linklock.set_state(p->linklock); + in->dirfragtreelock.set_state(p->dirfragtreelock); in->filelock.set_state(p->filelock); dout(10) << " got " << *in << endl; } @@ -2147,20 +2154,20 @@ void MDCache::inode_remove_replica(CInode *in, int from) // note: this code calls _eval more often than it needs to! // fix lock - if (in->hardlock.is_gathering(from)) { - in->hardlock.remove_gather(from); - if (!in->hardlock.is_gathering()) - mds->locker->simple_eval(&in->hardlock); - } - if (in->filelock.is_gathering(from)) { - in->filelock.remove_gather(from); - if (!in->filelock.is_gathering()) - mds->locker->file_eval(&in->filelock); - } + if (in->authlock.remove_replica(from)) + mds->locker->simple_eval(&in->authlock); + if (in->linklock.remove_replica(from)) + mds->locker->simple_eval(&in->linklock); + if (in->dirfragtreelock.remove_replica(from)) + mds->locker->simple_eval(&in->dirfragtreelock); + if (in->filelock.remove_replica(from)) + mds->locker->simple_eval(&in->filelock); // alone now? if (!in->is_replicated()) { - mds->locker->simple_eval(&in->hardlock); + mds->locker->simple_eval(&in->authlock); + mds->locker->simple_eval(&in->linklock); + mds->locker->simple_eval(&in->dirfragtreelock); mds->locker->file_eval(&in->filelock); } } @@ -2569,7 +2576,7 @@ int MDCache::path_traverse(MDRequest *mdr, */ // must read directory hard data (permissions, x bit) to traverse - if (!noperm && !mds->locker->simple_rdlock_try(&cur->hardlock, ondelay)) { + if (!noperm && !mds->locker->simple_rdlock_try(&cur->authlock, ondelay)) { return 1; } diff --git a/branches/sage/cephmds2/mds/Migrator.cc b/branches/sage/cephmds2/mds/Migrator.cc index df6d0a364a645..fd8404b6fc184 100644 --- a/branches/sage/cephmds2/mds/Migrator.cc +++ b/branches/sage/cephmds2/mds/Migrator.cc @@ -764,10 +764,20 @@ void Migrator::encode_export_inode(CInode *in, bufferlist& enc_state, int new_au in->clear_replicas(); // twiddle lock states for auth -> replica transition - // hard - in->hardlock.clear_gather(); - if (in->hardlock.get_state() == LOCK_GLOCKR) - in->hardlock.set_state(LOCK_LOCK); + // auth + in->authlock.clear_gather(); + if (in->authlock.get_state() == LOCK_GLOCKR) + in->authlock.set_state(LOCK_LOCK); + + // link + in->linklock.clear_gather(); + if (in->linklock.get_state() == LOCK_GLOCKR) + in->linklock.set_state(LOCK_LOCK); + + // dirfragtree + in->dirfragtreelock.clear_gather(); + if (in->dirfragtreelock.get_state() == LOCK_GLOCKR) + in->dirfragtreelock.set_state(LOCK_LOCK); // file : we lost all our caps, so move to stable state! in->filelock.clear_gather(); @@ -1574,7 +1584,9 @@ void Migrator::import_reverse(CDir *dir, bool fix_dir_auth) in->clear_replicas(); if (in->is_dirty()) in->mark_clean(); - in->hardlock.clear_gather(); + in->authlock.clear_gather(); + in->linklock.clear_gather(); + in->dirfragtreelock.clear_gather(); in->filelock.clear_gather(); // non-bounding dir? @@ -1789,13 +1801,12 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist& bl, int& off, int ol in->remove_replica(mds->get_nodeid()); // twiddle locks - // hard - if (in->hardlock.get_state() == LOCK_GLOCKR) { - in->hardlock.remove_gather(mds->get_nodeid()); - in->hardlock.remove_gather(oldauth); - if (!in->hardlock.is_gathering()) - mds->locker->simple_eval(&in->hardlock); - } + if (in->authlock.do_import(oldauth, mds->get_nodeid())) + mds->locker->simple_eval(&in->authlock); + if (in->linklock.do_import(oldauth, mds->get_nodeid())) + mds->locker->simple_eval(&in->linklock); + if (in->dirfragtreelock.do_import(oldauth, mds->get_nodeid())) + mds->locker->simple_eval(&in->dirfragtreelock); // caps for (set::iterator it = merged_client_caps.begin(); @@ -1813,13 +1824,8 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist& bl, int& off, int ol } // filelock - if (!in->filelock.is_stable()) { - // take me and old auth out of gather set - in->filelock.remove_gather(mds->get_nodeid()); - in->filelock.remove_gather(oldauth); - if (!in->filelock.is_gathering()) // necessary but not suffient... - mds->locker->file_eval(&in->filelock); - } + if (in->filelock.do_import(oldauth, mds->get_nodeid())) + mds->locker->simple_eval(&in->filelock); } diff --git a/branches/sage/cephmds2/mds/Server.cc b/branches/sage/cephmds2/mds/Server.cc index 5c8c40c07b521..d8fb638c47ada 100644 --- a/branches/sage/cephmds2/mds/Server.cc +++ b/branches/sage/cephmds2/mds/Server.cc @@ -859,7 +859,7 @@ void Server::handle_client_chmod(MDRequest *mdr) if (!cur) return; // write - if (!mds->locker->xlock_start(&cur->hardlock, mdr)) + if (!mds->locker->xlock_start(&cur->authlock, mdr)) return; mds->balancer->hit_inode(cur, META_POP_IWR); @@ -918,7 +918,7 @@ void Server::handle_client_chown(MDRequest *mdr) if (!cur) return; // write - if (!mds->locker->xlock_start(&cur->hardlock, mdr)) + if (!mds->locker->xlock_start(&cur->authlock, mdr)) return; mds->balancer->hit_inode(cur, META_POP_IWR); @@ -1285,7 +1285,7 @@ void Server::handle_client_link(MDRequest *mdr) xlocks.insert(&dn->lock); for (unsigned i=0; ilock); - xlocks.insert(&targeti->hardlock); + xlocks.insert(&targeti->linklock); if (!mds->locker->acquire_locks(mdr, rdlocks, xlocks)) return; @@ -1531,7 +1531,7 @@ void Server::handle_client_unlink(MDRequest *mdr) for (unsigned i=0; ilock); xlocks.insert(&dn->lock); - xlocks.insert(&in->hardlock); + xlocks.insert(&in->linklock); if (!mds->locker->acquire_locks(mdr, rdlocks, xlocks)) return; @@ -1925,7 +1925,7 @@ void Server::handle_client_rename(MDRequest *mdr) xlocks.insert(&destdn->lock); // xlock oldin - if (oldin) xlocks.insert(&oldin->hardlock); + if (oldin) xlocks.insert(&oldin->linklock); if (!mds->locker->acquire_locks(mdr, rdlocks, xlocks)) return; diff --git a/branches/sage/cephmds2/mds/SimpleLock.h b/branches/sage/cephmds2/mds/SimpleLock.h index 7221afccf4447..3062809428b9d 100644 --- a/branches/sage/cephmds2/mds/SimpleLock.h +++ b/branches/sage/cephmds2/mds/SimpleLock.h @@ -18,22 +18,21 @@ // -- lock types -- // NOTE: this also defines the lock ordering! #define LOCK_OTYPE_DN 1 + #define LOCK_OTYPE_IFILE 2 -#define LOCK_OTYPE_IHARD 3 // deprecate me? +#define LOCK_OTYPE_IAUTH 3 +#define LOCK_OTYPE_ILINK 4 +#define LOCK_OTYPE_IDIRFRAGTREE 5 -#define LOCK_OTYPE_IPERM 4 -#define LOCK_OTYPE_ILINK 5 -#define LOCK_OTYPE_IDIRTREE 6 -#define LOCK_OTYPE_DIR 7 +#define LOCK_OTYPE_DIR 7 // not used inline const char *get_lock_type_name(int t) { switch (t) { case LOCK_OTYPE_DN: return "dentry"; case LOCK_OTYPE_IFILE: return "inode_file"; - case LOCK_OTYPE_IHARD: return "inode_hard"; - case LOCK_OTYPE_IPERM: return "inode_perm"; + case LOCK_OTYPE_IAUTH: return "inode_auth"; case LOCK_OTYPE_ILINK: return "inode_link"; - case LOCK_OTYPE_IDIRTREE: return "inode_dirtree"; + case LOCK_OTYPE_IDIRFRAGTREE: return "inode_dirfragtree"; default: assert(0); } } @@ -147,6 +146,7 @@ public: } // ref counting + bool is_rdlocked() { return num_rdlock > 0; } int get_rdlock() { return ++num_rdlock; } int put_rdlock() { assert(num_rdlock>0); @@ -165,7 +165,7 @@ public: bool is_xlocked() { return xlock_by ? true:false; } MDRequest *get_xlocked_by() { return xlock_by; } bool is_used() { - return (is_xlocked() || (num_rdlock>0)) ? true:false; + return is_xlocked() || is_rdlocked(); } // encode/decode @@ -192,6 +192,32 @@ public: } return 0; } + /** replicate_relax + * called on first replica creation. + */ + void replicate_relax() { + assert(parent->is_auth()); + assert(!parent->is_replicated()); + if (state == LOCK_LOCK && !is_used()) + state = LOCK_SYNC; + } + bool remove_replica(int from) { + if (is_gathering(from)) { + remove_gather(from); + if (!is_gathering()) + return true; + } + return false; + } + bool do_import(int from, int to) { + if (!is_stable()) { + remove_gather(from); + remove_gather(to); + if (!is_gathering()) + return true; + } + return false; + } bool can_rdlock(MDRequest *mdr) { if (state == LOCK_SYNC) diff --git a/branches/sage/cephmds2/messages/MClientReply.h b/branches/sage/cephmds2/messages/MClientReply.h index 068ab52dabb29..874cedbd8bb32 100644 --- a/branches/sage/cephmds2/messages/MClientReply.h +++ b/branches/sage/cephmds2/messages/MClientReply.h @@ -68,10 +68,9 @@ class InodeStat { { // inode.mask inode.mask = INODE_MASK_BASE; - if (in->filelock.can_rdlock(0)) - inode.mask |= INODE_MASK_PERM; - if (in->hardlock.can_rdlock(0)) - inode.mask |= INODE_MASK_SIZE | INODE_MASK_MTIME; // fixme when we separate this out. + if (in->authlock.can_rdlock(0)) inode.mask |= INODE_MASK_AUTH; + if (in->linklock.can_rdlock(0)) inode.mask |= INODE_MASK_LINK; + if (in->filelock.can_rdlock(0)) inode.mask |= INODE_MASK_FILE; // symlink content? if (in->is_symlink()) @@ -96,7 +95,7 @@ class InodeStat { } void _encode(bufferlist &bl) { - bl.append((char*)&inode, sizeof(inode)); + ::_encode(inode, bl); ::_encode(dirfrag_auth, bl); ::_encode(dirfrag_dist, bl); ::_encode(dirfrag_rep, bl); @@ -105,8 +104,7 @@ class InodeStat { } void _decode(bufferlist &bl, int& off) { - bl.copy(off, sizeof(inode), (char*)&inode); - off += sizeof(inode); + ::_decode(inode, bl, off); ::_decode(dirfrag_auth, bl, off); ::_decode(dirfrag_dist, bl, off); ::_decode(dirfrag_rep, bl, off); diff --git a/branches/sage/cephmds2/messages/MMDSCacheRejoinAck.h b/branches/sage/cephmds2/messages/MMDSCacheRejoinAck.h index 3b7fde460e78e..2fe60fbaf0702 100644 --- a/branches/sage/cephmds2/messages/MMDSCacheRejoinAck.h +++ b/branches/sage/cephmds2/messages/MMDSCacheRejoinAck.h @@ -24,11 +24,16 @@ class MMDSCacheRejoinAck : public Message { public: struct inodeinfo { inodeno_t ino; - int hardlock; + int authlock; + int linklock; + int dirfragtreelock; int filelock; int nonce; inodeinfo() {} - inodeinfo(inodeno_t i, int h, int f, int n) : ino(i), hardlock(h), filelock(f), nonce(n) {} + inodeinfo(inodeno_t i, int a, int l, int dft, int f, int n) : + ino(i), + authlock(a), linklock(l), dirfragtreelock(dft), filelock(f), + nonce(n) {} }; struct dninfo { int lock; @@ -56,8 +61,8 @@ class MMDSCacheRejoinAck : public Message { void add_dentry(dirfrag_t dirfrag, const string& dn, int ls, int nonce) { dentries[dirfrag][dn] = dninfo(ls, nonce); } - void add_inode(inodeno_t ino, int hl, int fl, int nonce) { - inodes.push_back(inodeinfo(ino, hl, fl, nonce)); + void add_inode(inodeno_t ino, int authl, int linkl, int dftl, int fl, int nonce) { + inodes.push_back(inodeinfo(ino, authl, linkl, dftl, fl, nonce)); } void encode_payload() { -- 2.39.5