From b1e8f0ecaa6cfa96bccfbdc8f53b802dbfb5bda7 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 25 Mar 2008 12:45:41 -0700 Subject: [PATCH] mds: lease revocation works. cleaned up stat vs lock ids. --- src/Makefile.am | 1 + src/client/Client.cc | 10 +- src/include/ceph_fs.h | 50 +++++----- src/mds/CDentry.h | 8 +- src/mds/CInode.cc | 22 ++--- src/mds/CInode.h | 24 ++--- src/mds/CacheObject.cc | 83 +++++++++++++++++ src/mds/Locker.cc | 179 ++++++++++++++++++++++++++---------- src/mds/Locker.h | 2 + src/mds/MDCache.cc | 3 +- src/mds/Server.cc | 15 ++- src/mds/SimpleLock.h | 55 ++++++----- src/mds/mdstypes.h | 20 ++-- src/messages/MClientLock.h | 23 +++-- src/messages/MClientReply.h | 8 +- 15 files changed, 340 insertions(+), 163 deletions(-) create mode 100644 src/mds/CacheObject.cc diff --git a/src/Makefile.am b/src/Makefile.am index 4b6e98b634cf4..ece3192f069b7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -177,6 +177,7 @@ libmds_a_SOURCES = \ mds/journal.cc \ mds/Server.cc \ mds/MDCache.cc \ + mds/CacheObject.cc \ mds/Locker.cc \ mds/Migrator.cc \ mds/MDBalancer.cc \ diff --git a/src/client/Client.cc b/src/client/Client.cc index e3609408f7f81..c224b9f923f00 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -1151,7 +1151,7 @@ void Client::handle_lock(MClientLock *m) } in = inode_map[m->ino]; - if (m->lock_type == LOCK_OTYPE_DN) { + if (m->lock_type == CEPH_LOCK_DN) { if (!in->dir || in->dir->dentries.count(m->dname) == 0) { dout(10) << " don't have dir|dentry " << m->ino << "/" << m->dname <dir->dentries[m->dname]; dout(10) << " reset ttl on " << dn << dendl; dn->ttl = utime_t(); + } else { + int newmask = in->mask & ~m->mask; + dout(10) << " reset inode " << in->ino() + << " mask " << in->mask << " -> " << newmask << dendl; + in->mask = newmask; } revoke: - messenger->send_message(new MClientLock(m->lock_type, m->action, m->ino, m->dname), + messenger->send_message(new MClientLock(m->lock_type, CEPH_MDS_LOCK_RELEASE, + m->mask, m->ino, m->dname), m->get_source_inst()); delete m; } diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 8b6585454c4b7..9ec2353b5ae16 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -267,8 +267,7 @@ struct ceph_msg_header { #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25 #define CEPH_MSG_CLIENT_REPLY 26 #define CEPH_MSG_CLIENT_FILECAPS 0x310 - -#define CEPH_MSG_CLIENT_LOCK 28 +#define CEPH_MSG_CLIENT_LOCK 0x311 /* osd */ #define CEPH_MSG_OSD_GETMAP 40 @@ -309,26 +308,35 @@ struct ceph_statfs { /* - * metadata/stat validity masks + * metadata lock types. + * - these are bitmasks.. we can compose them + * - they also define the lock ordering by the MDS + * - a few of these are internal to the mds + */ +#define CEPH_LOCK_DN 1 +#define CEPH_LOCK_IVERSION 2 /* mds internal */ +#define CEPH_LOCK_IFILE 4 +#define CEPH_LOCK_IAUTH 8 +#define CEPH_LOCK_ILINK 16 +#define CEPH_LOCK_IDFT 32 /* dir frag tree */ +#define CEPH_LOCK_IDIR 64 /* mds internal */ +#define CEPH_LOCK_INO 128 /* immutable inode bits; not actually a lock */ + +/* + * stat masks are defined in terms of the locks that cover inode fields. */ -#define CEPH_STAT_MASK_INODE 1 /* immutable inode bits */ -#define CEPH_STAT_MASK_AUTH 2 -#define CEPH_STAT_MASK_LINK 4 -#define CEPH_STAT_MASK_FILE 8 -#define CEPH_STAT_MASK_INODE_ALL 15 - -#define CEPH_STAT_MASK_DN 64 /* dentry */ - -#define CEPH_STAT_MASK_TYPE CEPH_STAT_MASK_INODE /* mode >> 12 */ -#define CEPH_STAT_MASK_SYMLINK CEPH_STAT_MASK_INODE -#define CEPH_STAT_MASK_LAYOUT CEPH_STAT_MASK_INODE -#define CEPH_STAT_MASK_UID CEPH_STAT_MASK_AUTH -#define CEPH_STAT_MASK_GID CEPH_STAT_MASK_AUTH -#define CEPH_STAT_MASK_MODE CEPH_STAT_MASK_AUTH -#define CEPH_STAT_MASK_NLINK CEPH_STAT_MASK_LINK -#define CEPH_STAT_MASK_MTIME CEPH_STAT_MASK_FILE -#define CEPH_STAT_MASK_SIZE CEPH_STAT_MASK_FILE -#define CEPH_STAT_MASK_ATIME CEPH_STAT_MASK_FILE /* fixme */ +#define CEPH_STAT_MASK_INODE CEPH_LOCK_INO +#define CEPH_STAT_MASK_TYPE CEPH_LOCK_INO /* mode >> 12 */ +#define CEPH_STAT_MASK_SYMLINK CEPH_LOCK_INO +#define CEPH_STAT_MASK_LAYOUT CEPH_LOCK_INO +#define CEPH_STAT_MASK_UID CEPH_LOCK_IAUTH +#define CEPH_STAT_MASK_GID CEPH_LOCK_IAUTH +#define CEPH_STAT_MASK_MODE CEPH_LOCK_IAUTH +#define CEPH_STAT_MASK_NLINK CEPH_LOCK_ILINK +#define CEPH_STAT_MASK_MTIME CEPH_LOCK_IFILE +#define CEPH_STAT_MASK_SIZE CEPH_LOCK_IFILE +#define CEPH_STAT_MASK_ATIME CEPH_LOCK_IFILE /* fixme */ +#define CEPH_STAT_MASK_INODE_ALL (CEPH_LOCK_IFILE|CEPH_LOCK_IAUTH|CEPH_LOCK_ILINK|CEPH_LOCK_INO) /* client_session */ diff --git a/src/mds/CDentry.h b/src/mds/CDentry.h index b99ad9ea603d5..f55ed50b0be11 100644 --- a/src/mds/CDentry.h +++ b/src/mds/CDentry.h @@ -118,7 +118,7 @@ public: xlist_dirty(this), dir_offset(0), auth_pins(0), nested_auth_pins(0), - lock(this, LOCK_OTYPE_DN, WAIT_LOCK_OFFSET) { } + lock(this, CEPH_LOCK_DN, WAIT_LOCK_OFFSET) { } CDentry(const string& n, CInode *in) : name(n), remote_ino(0), remote_d_type(0), @@ -127,7 +127,7 @@ public: xlist_dirty(this), dir_offset(0), auth_pins(0), nested_auth_pins(0), - lock(this, LOCK_OTYPE_DN, WAIT_LOCK_OFFSET) { } + lock(this, CEPH_LOCK_DN, WAIT_LOCK_OFFSET) { } CDentry(const string& n, inodeno_t ino, unsigned char dt, CInode *in=0) : name(n), remote_ino(ino), remote_d_type(dt), @@ -136,7 +136,7 @@ public: xlist_dirty(this), dir_offset(0), auth_pins(0), nested_auth_pins(0), - lock(this, LOCK_OTYPE_DN, WAIT_LOCK_OFFSET) { } + lock(this, CEPH_LOCK_DN, WAIT_LOCK_OFFSET) { } CInode *get_inode() const { return inode; } CDir *get_dir() const { return dir; } @@ -252,7 +252,7 @@ public: // -- locking -- SimpleLock* get_lock(int type) { - assert(type == LOCK_OTYPE_DN); + assert(type == CEPH_LOCK_DN); return &lock; } void set_object_info(MDSCacheObjectInfo &info); diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index f66009264660f..d4ff874ad48e4 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -480,20 +480,20 @@ void CInode::set_object_info(MDSCacheObjectInfo &info) void CInode::encode_lock_state(int type, bufferlist& bl) { switch (type) { - case LOCK_OTYPE_IAUTH: + case CEPH_LOCK_IAUTH: _encode(inode.ctime, bl); _encode(inode.mode, bl); _encode(inode.uid, bl); _encode(inode.gid, bl); break; - case LOCK_OTYPE_ILINK: + case CEPH_LOCK_ILINK: _encode(inode.ctime, bl); _encode(inode.nlink, bl); _encode(inode.anchored, bl); break; - case LOCK_OTYPE_IDIRFRAGTREE: + case CEPH_LOCK_IDFT: { // encode the raw tree dirfragtree._encode(bl); @@ -511,13 +511,13 @@ void CInode::encode_lock_state(int type, bufferlist& bl) } break; - case LOCK_OTYPE_IFILE: + case CEPH_LOCK_IFILE: _encode(inode.size, bl); _encode(inode.mtime, bl); _encode(inode.atime, bl); break; - case LOCK_OTYPE_IDIR: + case CEPH_LOCK_IDIR: _encode(inode.mtime, bl); if (0) { map frag_sizes; @@ -543,7 +543,7 @@ void CInode::decode_lock_state(int type, bufferlist& bl) utime_t tm; switch (type) { - case LOCK_OTYPE_IAUTH: + case CEPH_LOCK_IAUTH: _decode(tm, bl, off); if (inode.ctime < tm) inode.ctime = tm; _decode(inode.mode, bl, off); @@ -551,14 +551,14 @@ void CInode::decode_lock_state(int type, bufferlist& bl) _decode(inode.gid, bl, off); break; - case LOCK_OTYPE_ILINK: + case CEPH_LOCK_ILINK: _decode(tm, bl, off); if (inode.ctime < tm) inode.ctime = tm; _decode(inode.nlink, bl, off); _decode(inode.anchored, bl, off); break; - case LOCK_OTYPE_IDIRFRAGTREE: + case CEPH_LOCK_IDFT: { fragtree_t temp; temp._decode(bl, off); @@ -575,13 +575,13 @@ void CInode::decode_lock_state(int type, bufferlist& bl) } break; - case LOCK_OTYPE_IFILE: + case CEPH_LOCK_IFILE: _decode(inode.size, bl, off); _decode(inode.mtime, bl, off); _decode(inode.atime, bl, off); break; - case LOCK_OTYPE_IDIR: + case CEPH_LOCK_IDIR: //::_decode(inode.size, bl, off); _decode(tm, bl, off); if (inode.mtime < tm) { @@ -609,7 +609,7 @@ void CInode::clear_dirty_scattered(int type) { dout(10) << "clear_dirty_scattered " << type << " on " << *this << dendl; switch (type) { - case LOCK_OTYPE_IDIR: + case CEPH_LOCK_IDIR: xlist_dirty_inode_mtime.remove_myself(); break; default: diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 45366efdf1110..a654144ff6968 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -243,12 +243,12 @@ public: xlist_dirty(this), xlist_open_file(this), xlist_dirty_inode_mtime(this), xlist_purging_inode(this), auth_pins(0), nested_auth_pins(0), - versionlock(this, LOCK_OTYPE_IVERSION, WAIT_VERSIONLOCK_OFFSET), - authlock(this, LOCK_OTYPE_IAUTH, WAIT_AUTHLOCK_OFFSET), - linklock(this, LOCK_OTYPE_ILINK, WAIT_LINKLOCK_OFFSET), - dirfragtreelock(this, LOCK_OTYPE_IDIRFRAGTREE, WAIT_DIRFRAGTREELOCK_OFFSET), - filelock(this, LOCK_OTYPE_IFILE, WAIT_FILELOCK_OFFSET), - dirlock(this, LOCK_OTYPE_IDIR, WAIT_DIRLOCK_OFFSET) + versionlock(this, CEPH_LOCK_IVERSION, WAIT_VERSIONLOCK_OFFSET), + authlock(this, CEPH_LOCK_IAUTH, WAIT_AUTHLOCK_OFFSET), + linklock(this, CEPH_LOCK_ILINK, WAIT_LINKLOCK_OFFSET), + dirfragtreelock(this, CEPH_LOCK_IDFT, WAIT_DIRFRAGTREELOCK_OFFSET), + filelock(this, CEPH_LOCK_IFILE, WAIT_FILELOCK_OFFSET), + dirlock(this, CEPH_LOCK_IDIR, WAIT_DIRLOCK_OFFSET) { state = 0; if (auth) state_set(STATE_AUTH); @@ -337,13 +337,13 @@ public: SimpleLock* get_lock(int type) { switch (type) { - case LOCK_OTYPE_IFILE: return &filelock; - case LOCK_OTYPE_IAUTH: return &authlock; - case LOCK_OTYPE_ILINK: return &linklock; - case LOCK_OTYPE_IDIRFRAGTREE: return &dirfragtreelock; - case LOCK_OTYPE_IDIR: return &dirlock; - default: assert(0); return 0; + case CEPH_LOCK_IFILE: return &filelock; + case CEPH_LOCK_IAUTH: return &authlock; + case CEPH_LOCK_ILINK: return &linklock; + case CEPH_LOCK_IDFT: return &dirfragtreelock; + case CEPH_LOCK_IDIR: return &dirlock; } + return 0; } void set_object_info(MDSCacheObjectInfo &info); void encode_lock_state(int type, bufferlist& bl); diff --git a/src/mds/CacheObject.cc b/src/mds/CacheObject.cc new file mode 100644 index 0000000000000..2e32cbf485815 --- /dev/null +++ b/src/mds/CacheObject.cc @@ -0,0 +1,83 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#include "mdstypes.h" +#include "SimpleLock.h" + +#include "config.h" +#define dout(l) if (l<=g_conf.debug || l <= g_conf.debug_mds) *_dout << dbeginl << g_clock.now() << " " << this << " " +#define derr(l) if (l<=g_conf.debug || l <= g_conf.debug_mds) *_derr << dbeginl << g_clock.now() << " " << this << " " + +ClientReplica *MDSCacheObject::add_client_replica(int c, int mask) +{ + ClientReplica *r; + if (client_replica_map.count(c)) + r = client_replica_map[c]; + else { + if (client_replica_map.empty()) + get(PIN_CLIENTREPLICA); + r = client_replica_map[c] = new ClientReplica(c, this); + } + + int adding = ~r->mask & mask; + dout(10) << " had " << r->mask << " adding " << mask << " -> new " << adding << dendl; + int b = 0; + while (adding) { + if (adding & 1) { + SimpleLock *lock = get_lock(1 << b); + if (lock) { + lock->get_client_lease(); + dout(10) << "get_client_lease on " << (1 << b) << " " << *lock << dendl; + } + } + b++; + adding = adding >> 1; + } + r->mask |= mask; + + return r; +} + +int MDSCacheObject::remove_client_replica(ClientReplica *r, int mask) +{ + assert(r->parent == this); + + int removing = r->mask & mask; + dout(10) << "had " << r->mask << " removing " << mask << " -> " << removing << dendl; + int b = 0; + while (removing) { + if (removing & 1) { + SimpleLock *lock = get_lock(1 << b); + if (lock) { + lock->put_client_lease(); + dout(10) << "put_client_lease on " << (1 << b) << " " << *lock << dendl; + } + } + b++; + removing = removing >> 1; + } + + r->mask &= ~mask; + if (r->mask) + return r->mask; + + // remove! + client_replica_map.erase(r->client); + delete r; + if (client_replica_map.empty()) + put(PIN_CLIENTREPLICA); + return 0; +} + diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 212671d390de7..a0108ccf13ce5 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -62,22 +62,23 @@ void Locker::dispatch(Message *m) switch (m->get_type()) { - // locking + // inter-mds locking case MSG_MDS_LOCK: handle_lock((MLock*)m); break; - - // cache fun + // inter-mds caps case MSG_MDS_INODEFILECAPS: handle_inode_file_caps((MInodeFileCaps*)m); break; + // client sync case CEPH_MSG_CLIENT_FILECAPS: handle_client_file_caps((MClientFileCaps*)m); break; - + case CEPH_MSG_CLIENT_LOCK: + handle_client_lock((MClientLock*)m); + break; - default: assert(0); } @@ -139,7 +140,7 @@ bool Locker::acquire_locks(MDRequest *mdr, sorted.insert(*p); // augment xlock with a versionlock? - if ((*p)->get_type() > LOCK_OTYPE_IVERSION) { + if ((*p)->get_type() > CEPH_LOCK_IVERSION) { // inode version lock? CInode *in = (CInode*)(*p)->get_parent(); if (mdr->is_master()) { @@ -159,7 +160,7 @@ bool Locker::acquire_locks(MDRequest *mdr, sorted.insert(*p); if ((*p)->get_parent()->is_auth()) mustpin.insert(*p); - else if ((*p)->get_type() == LOCK_OTYPE_IDIR && + else if ((*p)->get_type() == CEPH_LOCK_IDIR && !(*p)->get_parent()->is_auth() && !((ScatterLock*)(*p))->can_wrlock()) { // we might have to request a scatter dout(15) << " will also auth_pin " << *(*p)->get_parent() << " in case we need to request a scatter" << dendl; mustpin.insert(*p); @@ -335,13 +336,26 @@ void Locker::drop_locks(MDRequest *mdr) // generics +void Locker::eval_gather(SimpleLock *lock) +{ + switch (lock->get_type()) { + case CEPH_LOCK_IFILE: + return file_eval_gather((FileLock*)lock); + case CEPH_LOCK_IDFT: + case CEPH_LOCK_IDIR: + return scatter_eval_gather((ScatterLock*)lock); + default: + return simple_eval_gather(lock); + } +} + bool Locker::rdlock_start(SimpleLock *lock, MDRequest *mdr) { switch (lock->get_type()) { - case LOCK_OTYPE_IFILE: + case CEPH_LOCK_IFILE: return file_rdlock_start((FileLock*)lock, mdr); - case LOCK_OTYPE_IDIRFRAGTREE: - case LOCK_OTYPE_IDIR: + case CEPH_LOCK_IDFT: + case CEPH_LOCK_IDIR: return scatter_rdlock_start((ScatterLock*)lock, mdr); default: return simple_rdlock_start(lock, mdr); @@ -351,10 +365,10 @@ bool Locker::rdlock_start(SimpleLock *lock, MDRequest *mdr) void Locker::rdlock_finish(SimpleLock *lock, MDRequest *mdr) { switch (lock->get_type()) { - case LOCK_OTYPE_IFILE: + case CEPH_LOCK_IFILE: return file_rdlock_finish((FileLock*)lock, mdr); - case LOCK_OTYPE_IDIRFRAGTREE: - case LOCK_OTYPE_IDIR: + case CEPH_LOCK_IDFT: + case CEPH_LOCK_IDIR: return scatter_rdlock_finish((ScatterLock*)lock, mdr); default: return simple_rdlock_finish(lock, mdr); @@ -364,10 +378,10 @@ void Locker::rdlock_finish(SimpleLock *lock, MDRequest *mdr) bool Locker::wrlock_start(SimpleLock *lock, MDRequest *mdr) { switch (lock->get_type()) { - case LOCK_OTYPE_IDIRFRAGTREE: - case LOCK_OTYPE_IDIR: + case CEPH_LOCK_IDFT: + case CEPH_LOCK_IDIR: return scatter_wrlock_start((ScatterLock*)lock, mdr); - case LOCK_OTYPE_IVERSION: + case CEPH_LOCK_IVERSION: return local_wrlock_start((LocalLock*)lock, mdr); default: assert(0); @@ -378,10 +392,10 @@ bool Locker::wrlock_start(SimpleLock *lock, MDRequest *mdr) void Locker::wrlock_finish(SimpleLock *lock, MDRequest *mdr) { switch (lock->get_type()) { - case LOCK_OTYPE_IDIRFRAGTREE: - case LOCK_OTYPE_IDIR: + case CEPH_LOCK_IDFT: + case CEPH_LOCK_IDIR: return scatter_wrlock_finish((ScatterLock*)lock, mdr); - case LOCK_OTYPE_IVERSION: + case CEPH_LOCK_IVERSION: return local_wrlock_finish((LocalLock*)lock, mdr); default: assert(0); @@ -391,12 +405,12 @@ void Locker::wrlock_finish(SimpleLock *lock, MDRequest *mdr) bool Locker::xlock_start(SimpleLock *lock, MDRequest *mdr) { switch (lock->get_type()) { - case LOCK_OTYPE_IFILE: + case CEPH_LOCK_IFILE: return file_xlock_start((FileLock*)lock, mdr); - case LOCK_OTYPE_IVERSION: + case CEPH_LOCK_IVERSION: return local_xlock_start((LocalLock*)lock, mdr); - case LOCK_OTYPE_IDIRFRAGTREE: - case LOCK_OTYPE_IDIR: + case CEPH_LOCK_IDFT: + case CEPH_LOCK_IDIR: assert(0); default: return simple_xlock_start(lock, mdr); @@ -406,12 +420,12 @@ bool Locker::xlock_start(SimpleLock *lock, MDRequest *mdr) void Locker::xlock_finish(SimpleLock *lock, MDRequest *mdr) { switch (lock->get_type()) { - case LOCK_OTYPE_IFILE: + case CEPH_LOCK_IFILE: return file_xlock_finish((FileLock*)lock, mdr); - case LOCK_OTYPE_IVERSION: + case CEPH_LOCK_IVERSION: return local_xlock_finish((LocalLock*)lock, mdr); - case LOCK_OTYPE_IDIRFRAGTREE: - case LOCK_OTYPE_IDIR: + case CEPH_LOCK_IDFT: + case CEPH_LOCK_IDIR: assert(0); default: return simple_xlock_finish(lock, mdr); @@ -936,7 +950,68 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) } +void Locker::handle_client_lock(MClientLock *m) +{ + dout(10) << "handle_client_lock " << *m << dendl; + + assert(m->get_source().is_client()); + int client = m->get_source().num(); + + CInode *in = mdcache->get_inode(m->ino); + if (!in) { + dout(7) << "handle_client_lock don't have ino " << m->ino << dendl; + delete m; + return; + } + CDentry *dn = 0; + MDSCacheObject *p; + if (m->lock_type == CEPH_LOCK_DN) { + frag_t fg = in->pick_dirfrag(m->dname); + CDir *dir = in->get_dirfrag(fg); + if (dir) + p = dn = dir->lookup(m->dname); + if (!dn) { + dout(7) << "handle_client_lock don't have dn " << m->ino << " " << m->dname << dendl; + delete m; + return; + } + } else { + p = in; + } + dout(10) << " on " << *p << dendl; + + // replica and lock + SimpleLock *lock = p->get_lock(m->lock_type); + assert(lock); + ClientReplica *r = in->get_client_replica(client); + if (!r) { + dout(7) << "handle_client_lock didn't have replica for client" << client << " of " << *p << dendl; + delete m; + return; + } + + switch (m->action) { + case CEPH_MDS_LOCK_RELEASE: + { + dout(7) << "handle_client_lock client" << client + << " release mask " << m->mask + << " on " << *p << dendl; + int left = p->remove_client_replica(r, r->mask); + dout(10) << " remaining mask is " << left << " on " << *p << dendl; + } + break; + case CEPH_MDS_LOCK_RENEW: + assert(0); // implement me + break; + } + + // eval/waiters + if (!lock->is_stable()) + eval_gather(lock); + + delete m; +} @@ -949,7 +1024,7 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) SimpleLock *Locker::get_lock(int lock_type, MDSCacheObjectInfo &info) { switch (lock_type) { - case LOCK_OTYPE_DN: + case CEPH_LOCK_DN: { // be careful; info.dirfrag may have incorrect frag; recalculate based on dname. CInode *diri = mdcache->get_inode(info.dirfrag.ino); @@ -969,11 +1044,11 @@ SimpleLock *Locker::get_lock(int lock_type, MDSCacheObjectInfo &info) return &dn->lock; } - case LOCK_OTYPE_IAUTH: - case LOCK_OTYPE_ILINK: - case LOCK_OTYPE_IDIRFRAGTREE: - case LOCK_OTYPE_IFILE: - case LOCK_OTYPE_IDIR: + case CEPH_LOCK_IAUTH: + case CEPH_LOCK_ILINK: + case CEPH_LOCK_IDFT: + case CEPH_LOCK_IFILE: + case CEPH_LOCK_IDIR: { CInode *in = mdcache->get_inode(info.ino); if (!in) { @@ -981,11 +1056,11 @@ SimpleLock *Locker::get_lock(int lock_type, MDSCacheObjectInfo &info) return 0; } switch (lock_type) { - case LOCK_OTYPE_IAUTH: return &in->authlock; - case LOCK_OTYPE_ILINK: return &in->linklock; - case LOCK_OTYPE_IDIRFRAGTREE: return &in->dirfragtreelock; - case LOCK_OTYPE_IFILE: return &in->filelock; - case LOCK_OTYPE_IDIR: return &in->dirlock; + case CEPH_LOCK_IAUTH: return &in->authlock; + case CEPH_LOCK_ILINK: return &in->linklock; + case CEPH_LOCK_IDFT: return &in->dirfragtreelock; + case CEPH_LOCK_IFILE: return &in->filelock; + case CEPH_LOCK_IDIR: return &in->dirlock; } } @@ -1012,18 +1087,18 @@ void Locker::handle_lock(MLock *m) } switch (lock->get_type()) { - case LOCK_OTYPE_DN: - case LOCK_OTYPE_IAUTH: - case LOCK_OTYPE_ILINK: + case CEPH_LOCK_DN: + case CEPH_LOCK_IAUTH: + case CEPH_LOCK_ILINK: handle_simple_lock(lock, m); break; - case LOCK_OTYPE_IFILE: + case CEPH_LOCK_IFILE: handle_file_lock((FileLock*)lock, m); break; - case LOCK_OTYPE_IDIRFRAGTREE: - case LOCK_OTYPE_IDIR: + case CEPH_LOCK_IDFT: + case CEPH_LOCK_IDIR: handle_scatter_lock((ScatterLock*)lock, m); break; @@ -1063,7 +1138,7 @@ void Locker::handle_simple_lock(SimpleLock *lock, MLock *m) lock->finish_waiters(SimpleLock::WAIT_RD|SimpleLock::WAIT_STABLE); // special case: trim replica no-longer-null dentry? - if (lock->get_type() == LOCK_OTYPE_DN) { + if (lock->get_type() == CEPH_LOCK_DN) { CDentry *dn = (CDentry*)lock->get_parent(); if (dn->is_null() && m->get_data().length() > 0) { dout(10) << "handle_simple_lock replica dentry null -> non-null, must trim " @@ -1161,6 +1236,7 @@ void Locker::simple_eval_gather(SimpleLock *lock) // finished gathering? if (lock->get_state() == LOCK_GLOCKR && !lock->is_gathering() && + lock->get_num_client_lease() == 0 && !lock->is_rdlocked()) { dout(7) << "simple_eval finished gather on " << *lock << " on " << *lock->get_parent() << dendl; @@ -1246,19 +1322,26 @@ void Locker::simple_lock(SimpleLock *lock) assert(lock->get_state() == LOCK_SYNC); if (lock->get_parent()->is_replicated() || - lock->get_parent()->is_client_replicated()) { + lock->get_num_client_lease()) { // bcast to mds replicas send_lock_message(lock, LOCK_AC_LOCK); // bcast to client replicas + int n = 0; for (hash_map::iterator p = lock->get_parent()->client_replica_map.begin(); p != lock->get_parent()->client_replica_map.end(); p++) { ClientReplica *r = p->second; - if (lock->get_type() == LOCK_OTYPE_DN) { + + if (r->mask & lock->get_type() == 0) + continue; + + n++; + if (lock->get_type() == CEPH_LOCK_DN) { CDentry *dn = (CDentry*)lock->get_parent(); mds->send_message_client(new MClientLock(lock->get_type(), CEPH_MDS_LOCK_REVOKE, + lock->get_type(), dn->get_dir()->ino(), dn->get_name()), r->client); @@ -1266,10 +1349,12 @@ void Locker::simple_lock(SimpleLock *lock) CInode *in = (CInode*)lock->get_parent(); mds->send_message_client(new MClientLock(lock->get_type(), CEPH_MDS_LOCK_REVOKE, + lock->get_type(), in->ino()), r->client); } } + assert(n == lock->get_num_client_lease()); // change lock lock->set_state(LOCK_GLOCKR); diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 7e0fcde9dc4e4..1b323b615e454 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -78,6 +78,7 @@ public: void drop_locks(MDRequest *mdr); protected: + void eval_gather(SimpleLock *lock); bool rdlock_start(SimpleLock *lock, MDRequest *mdr); void rdlock_finish(SimpleLock *lock, MDRequest *mdr); bool xlock_start(SimpleLock *lock, MDRequest *mdr); @@ -189,6 +190,7 @@ protected: protected: void handle_client_file_caps(class MClientFileCaps *m); + void handle_client_lock(class MClientLock *m); void request_inode_file_caps(CInode *in); void handle_inode_file_caps(class MInodeFileCaps *m); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 55b0443f5c322..c05ad1f50b772 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -88,7 +88,6 @@ using namespace std; - MDCache::MDCache(MDS *m) { mds = m; @@ -3504,7 +3503,7 @@ void MDCache::trim_client_replicas() if (r->ttl > now) break; MDSCacheObject *p = r->parent; dout(10) << " expiring client" << r->client << " replica of " << *p << dendl; - p->remove_client_replica(r); + p->remove_client_replica(r, r->mask); } dout(10) << "trim_client_replicas finish - " << client_replicas.size() << " replicas" << dendl; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 433da0f92953b..c8b49450fdf17 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -229,7 +229,7 @@ void Server::_session_logged(Session *session, bool open, version_t pv) ClientReplica *r = session->replicas.front(); MDSCacheObject *p = r->parent; dout(10) << " killing client replica of " << *p << dendl; - p->remove_client_replica(r); + p->remove_client_replica(r, r->mask); } if (session->is_closing()) @@ -577,8 +577,7 @@ void Server::set_trace_dist(Session *session, MClientReply *reply, CInode *in) // inode int mask = InodeStat::_encode(bl, in); if (mask) { - r = in->get_client_replica(client); - r->mask |= mask; + r = in->add_client_replica(client, mask); session->touch_replica(r); mdcache->touch_client_replica(r, ttl); } @@ -591,8 +590,8 @@ void Server::set_trace_dist(Session *session, MClientReply *reply, CInode *in) char dmask = 0; ::_encode_simple(dn->get_name(), bl); if (dn->lock.can_rdlock(0)) { - r = dn->get_client_replica(client); - dmask = r->mask = CEPH_STAT_MASK_DN; + dmask = CEPH_LOCK_DN; + r = dn->add_client_replica(client, dmask); session->touch_replica(r); mdcache->touch_client_replica(r, ttl); } @@ -1497,10 +1496,10 @@ void Server::handle_client_stat(MDRequest *mdr) set xlocks = mdr->xlocks; int mask = req->head.args.stat.mask; - if (mask & CEPH_STAT_MASK_LINK) rdlocks.insert(&ref->linklock); - if (mask & CEPH_STAT_MASK_AUTH) rdlocks.insert(&ref->authlock); + if (mask & CEPH_LOCK_ILINK) rdlocks.insert(&ref->linklock); + if (mask & CEPH_LOCK_IAUTH) rdlocks.insert(&ref->authlock); if (ref->is_file() && - mask & CEPH_STAT_MASK_FILE) rdlocks.insert(&ref->filelock); + mask & CEPH_LOCK_IFILE) rdlocks.insert(&ref->filelock); if (ref->is_dir() && mask & CEPH_STAT_MASK_MTIME) rdlocks.insert(&ref->dirlock); diff --git a/src/mds/SimpleLock.h b/src/mds/SimpleLock.h index 720247bc55a84..8d2dd743c039c 100644 --- a/src/mds/SimpleLock.h +++ b/src/mds/SimpleLock.h @@ -17,27 +17,18 @@ #define __SIMPLELOCK_H // -- lock types -- -// NOTE: this also defines the lock ordering! -#define LOCK_OTYPE_DN 1 - -#define LOCK_OTYPE_IVERSION 2 -#define LOCK_OTYPE_IFILE 3 -#define LOCK_OTYPE_IAUTH 4 -#define LOCK_OTYPE_ILINK 5 -#define LOCK_OTYPE_IDIRFRAGTREE 6 -#define LOCK_OTYPE_IDIR 7 - -//#define LOCK_OTYPE_DIR 7 // not used +// see CEPH_LOCK_* inline const char *get_lock_type_name(int t) { switch (t) { - case LOCK_OTYPE_DN: return "dn"; - case LOCK_OTYPE_IVERSION: return "iversion"; - case LOCK_OTYPE_IFILE: return "ifile"; - case LOCK_OTYPE_IAUTH: return "iauth"; - case LOCK_OTYPE_ILINK: return "ilink"; - case LOCK_OTYPE_IDIRFRAGTREE: return "idft"; - case LOCK_OTYPE_IDIR: return "idir"; + case CEPH_LOCK_DN: return "dn"; + case CEPH_LOCK_IVERSION: return "iversion"; + case CEPH_LOCK_IFILE: return "ifile"; + case CEPH_LOCK_IAUTH: return "iauth"; + case CEPH_LOCK_ILINK: return "ilink"; + case CEPH_LOCK_IDFT: return "idft"; + case CEPH_LOCK_IDIR: return "idir"; + case CEPH_LOCK_INO: return "ino"; default: assert(0); return 0; } } @@ -101,6 +92,7 @@ protected: // lock state int state; set gather_set; // auth+rep. >= 0 is mds, < 0 is client + int num_client_lease; // local state int num_rdlock; @@ -110,7 +102,7 @@ protected: public: SimpleLock(MDSCacheObject *o, int t, int wo) : parent(o), type(t), wait_offset(wo), - state(LOCK_SYNC), + state(LOCK_SYNC), num_client_lease(0), num_rdlock(0), xlock_by(0) { } virtual ~SimpleLock() {} @@ -121,8 +113,8 @@ public: struct ptr_lt { bool operator()(const SimpleLock* l, const SimpleLock* r) const { // first sort by object type (dn < inode) - if ((l->type>LOCK_OTYPE_DN) < (r->type>LOCK_OTYPE_DN)) return true; - if ((l->type>LOCK_OTYPE_DN) == (r->type>LOCK_OTYPE_DN)) { + if ((l->type>CEPH_LOCK_DN) < (r->type>CEPH_LOCK_DN)) return true; + if ((l->type>CEPH_LOCK_DN) == (r->type>CEPH_LOCK_DN)) { // then sort by object if (l->parent->is_lt(r->parent)) return true; if (l->parent == r->parent) { @@ -174,10 +166,6 @@ public: p != parent->replicas_end(); ++p) gather_set.insert(p->first); - for (hash_map::const_iterator p = parent->client_replica_map.begin(); - p != parent->client_replica_map.end(); - p++) - gather_set.insert(-1 - p->second->client); } bool is_gathering() { return !gather_set.empty(); } bool is_gathering(int i) { @@ -221,7 +209,18 @@ public: MDRequest *get_xlocked_by() { return xlock_by; } bool is_used() { - return is_xlocked() || is_rdlocked() || !parent->client_replica_map.empty(); + return is_xlocked() || is_rdlocked() || num_client_lease; + } + + void get_client_lease() { + num_client_lease++; + } + void put_client_lease() { + assert(num_client_lease > 0); + num_client_lease--; + } + int get_num_client_lease() { + return num_client_lease; } // encode/decode @@ -307,8 +306,8 @@ public: out << get_lock_type_name(get_type()) << " "; out << get_simplelock_state_name(get_state()); if (!get_gather_set().empty()) out << " g=" << get_gather_set(); - if (!parent->client_replica_map.empty()) - out << " c=" << parent->client_replica_map.size(); + if (num_client_lease) + out << " c=" << num_client_lease; if (is_rdlocked()) out << " r=" << get_num_rdlocks(); if (is_xlocked()) diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index 9729b8f475237..063782a69fea8 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -616,26 +616,18 @@ protected: // replicas (on clients) public: hash_map client_replica_map; - + ClientReplica *get_client_replica(int c) { if (client_replica_map.count(c)) return client_replica_map[c]; - else { - if (client_replica_map.empty()) - get(PIN_CLIENTREPLICA); - return client_replica_map[c] = new ClientReplica(c, this); - } + return 0; } - bool is_client_replicated() { + bool is_client_replicated__() { return !client_replica_map.empty(); } - void remove_client_replica(ClientReplica *r) { - assert(r->parent == this); - client_replica_map.erase(r->client); - delete r; - if (client_replica_map.empty()) - put(PIN_CLIENTREPLICA); - } + + ClientReplica *add_client_replica(int c, int mask); + int remove_client_replica(ClientReplica *r, int mask); // returns remaining mask (if any) // --------------------------------------------- diff --git a/src/messages/MClientLock.h b/src/messages/MClientLock.h index e93c6c7d25a0b..bf3269537eeca 100644 --- a/src/messages/MClientLock.h +++ b/src/messages/MClientLock.h @@ -32,31 +32,32 @@ static const char *get_clientlock_action_name(int a) { struct MClientLock : public Message { __u8 lock_type; __u8 action; + __u16 mask; __u64 ino; string dname; - MClientLock() : Message(CEPH_MSG_CLIENT_LOCK) {} - MClientLock(int l, int ac, __u64 i) : + MClientLock(int l, int ac, int m, __u64 i) : Message(CEPH_MSG_CLIENT_LOCK), - lock_type(l), action(ac), ino(i) {} - MClientLock(int l, int ac, __u64 i, const string& d) : + lock_type(l), action(ac), mask(m), ino(i) {} + MClientLock(int l, int ac, int m, __u64 i, const string& d) : Message(CEPH_MSG_CLIENT_LOCK), - lock_type(l), action(ac), ino(i), dname(d) {} - MClientLock(SimpleLock *lock, int ac, __u64 i) : + lock_type(l), action(ac), mask(m), ino(i), dname(d) {} + MClientLock(SimpleLock *lock, int ac, int m, __u64 i) : Message(CEPH_MSG_CLIENT_LOCK), lock_type(lock->get_type()), - action(ac), ino(i) {} - MClientLock(SimpleLock *lock, int ac, __u64 i, const string& d) : + action(ac), mask(m), ino(i) {} + MClientLock(SimpleLock *lock, int ac, int m, __u64 i, const string& d) : Message(CEPH_MSG_CLIENT_LOCK), lock_type(lock->get_type()), - action(ac), ino(i), dname(d) {} + action(ac), mask(m), ino(i), dname(d) {} const char *get_type_name() { return "client_lock"; } void print(ostream& out) { out << "client_lock(a=" << get_clientlock_action_name(action) << " " << get_lock_type_name(lock_type) - << " " << ino; + << " mask " << mask; + out << " " << inodeno_t(ino); if (dname.length()) out << "/" << dname; out << ")"; @@ -65,12 +66,14 @@ struct MClientLock : public Message { void decode_payload() { int off = 0; ::_decode(lock_type, payload, off); + ::_decode(mask, payload, off); ::_decode(action, payload, off); ::_decode(ino, payload, off); ::_decode(dname, payload, off); } virtual void encode_payload() { ::_encode(lock_type, payload); + ::_encode(mask, payload); ::_encode(action, payload); ::_encode(ino, payload); ::_encode(dname, payload); diff --git a/src/messages/MClientReply.h b/src/messages/MClientReply.h index 8a6f7171f10b2..69b1682917139 100644 --- a/src/messages/MClientReply.h +++ b/src/messages/MClientReply.h @@ -132,10 +132,10 @@ struct InodeStat { static int _encode(bufferlist &bl, CInode *in) { // mask - int mask = CEPH_STAT_MASK_INODE; - if (in->authlock.can_rdlock(0)) mask |= CEPH_STAT_MASK_AUTH; - if (in->linklock.can_rdlock(0)) mask |= CEPH_STAT_MASK_LINK; - if (in->filelock.can_rdlock(0)) mask |= CEPH_STAT_MASK_FILE; + int mask = CEPH_LOCK_INO; + if (in->authlock.can_rdlock(0)) mask |= CEPH_LOCK_IAUTH; + if (in->linklock.can_rdlock(0)) mask |= CEPH_LOCK_ILINK; + if (in->filelock.can_rdlock(0)) mask |= CEPH_LOCK_IFILE; /* * note: encoding matches struct ceph_client_reply_inode -- 2.39.5