From 7f46121db46a10764ee85b2a917d7e6e65504d2a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 2 Jan 2009 12:05:09 -0800 Subject: [PATCH] mds: share caps across multiple lock types. Very rough initial first pass. --- src/include/ceph_fs.h | 72 +++++++++--- src/mds/CDentry.h | 4 +- src/mds/CInode.cc | 4 +- src/mds/CInode.h | 54 ++++++--- src/mds/Capability.h | 28 +---- src/mds/FileLock.h | 123 ++++++++++---------- src/mds/LocalLock.h | 4 +- src/mds/Locker.cc | 224 +++++++++++++++++-------------------- src/mds/MDCache.cc | 12 +- src/mds/ScatterLock.h | 4 +- src/mds/Server.cc | 6 +- src/mds/SimpleLock.h | 79 +++++++++++-- src/mds/mdstypes.h | 46 ++++---- src/messages/MClientCaps.h | 4 +- 14 files changed, 376 insertions(+), 288 deletions(-) diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 1ceaebff1dfef..9f32bd7162463 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -898,15 +898,31 @@ static inline int ceph_flags_to_mode(int flags) return CEPH_FILE_MODE_RD; } -/* client file caps */ -#define CEPH_CAP_PIN 1 /* no specific capabilities beyond the pin */ -#define CEPH_CAP_RDCACHE 2 /* client can cache reads */ -#define CEPH_CAP_RD 4 /* client can read */ -#define CEPH_CAP_WR 8 /* client can write */ -#define CEPH_CAP_WRBUFFER 16 /* client can buffer writes */ -#define CEPH_CAP_WREXTEND 32 /* client can extend EOF */ -#define CEPH_CAP_LAZYIO 64 /* client can perform lazy io */ -#define CEPH_CAP_EXCL 128 /* exclusive/loner access */ + +/* capability bits */ +#define CEPH_CAP_PIN 1 /* no specific capabilities beyond the pin */ + +/* generic cap bits */ +#define CEPH_CAP_GRDCACHE 1 /* client can cache reads */ +#define CEPH_CAP_GEXCL 2 /* exclusive/loner access */ +#define CEPH_CAP_GRD 4 /* client can read */ +#define CEPH_CAP_GWR 8 /* client can write */ +#define CEPH_CAP_GWRBUFFER 16 /* client can buffer writes */ +#define CEPH_CAP_GWREXTEND 32 /* client can extend EOF */ +#define CEPH_CAP_GLAZYIO 64 /* client can perform lazy io */ + +/* per-lock shift */ +#define CEPH_CAP_SAUTH 2 +#define CEPH_CAP_SLINK 4 +#define CEPH_CAP_SXATTR 6 +#define CEPH_CAP_SFILE 8 + +#define CEPH_CAP_ANY_EXCL ((CEPH_CAP_GEXCL << CEPH_CAP_SAUTH) | \ + (CEPH_CAP_GEXCL << CEPH_CAP_SLINK) | \ + (CEPH_CAP_GEXCL << CEPH_CAP_SXATTR) | \ + (CEPH_CAP_GEXCL << CEPH_CAP_SFILE)) +#define CEPH_CAP_ANY_FILE_WR ((CEPH_CAP_GWR|CEPH_CAP_GWRBUFFER) << CEPH_CAP_SFILE) +#define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR) static inline int ceph_caps_for_mode(int mode) { @@ -915,16 +931,25 @@ static inline int ceph_caps_for_mode(int mode) return CEPH_CAP_PIN; case CEPH_FILE_MODE_RD: return CEPH_CAP_PIN | - CEPH_CAP_RD | CEPH_CAP_RDCACHE; + ((CEPH_CAP_GRD | CEPH_CAP_GRDCACHE) << CEPH_CAP_SFILE) | + ((CEPH_CAP_GRDCACHE | CEPH_CAP_GEXCL) << CEPH_CAP_SAUTH) | + ((CEPH_CAP_GRDCACHE | CEPH_CAP_GEXCL) << CEPH_CAP_SXATTR) | + ((CEPH_CAP_GRDCACHE) << CEPH_CAP_SLINK); case CEPH_FILE_MODE_RDWR: return CEPH_CAP_PIN | - CEPH_CAP_RD | CEPH_CAP_RDCACHE | - CEPH_CAP_WR | CEPH_CAP_WRBUFFER | - CEPH_CAP_EXCL; + ((CEPH_CAP_GRD | CEPH_CAP_GRDCACHE | + CEPH_CAP_GWR | CEPH_CAP_GWRBUFFER | + CEPH_CAP_GEXCL) << CEPH_CAP_SFILE) | + ((CEPH_CAP_GRDCACHE | CEPH_CAP_GEXCL) << CEPH_CAP_SAUTH) | + ((CEPH_CAP_GRDCACHE | CEPH_CAP_GEXCL) << CEPH_CAP_SXATTR) | + ((CEPH_CAP_GRDCACHE) << CEPH_CAP_SLINK); case CEPH_FILE_MODE_WR: return CEPH_CAP_PIN | - CEPH_CAP_WR | CEPH_CAP_WRBUFFER | - CEPH_CAP_EXCL; + ((CEPH_CAP_GWR | CEPH_CAP_GWRBUFFER | + CEPH_CAP_GEXCL) << CEPH_CAP_SFILE) | + ((CEPH_CAP_GRDCACHE | CEPH_CAP_GEXCL) << CEPH_CAP_SAUTH) | + ((CEPH_CAP_GRDCACHE | CEPH_CAP_GEXCL) << CEPH_CAP_SXATTR) | + ((CEPH_CAP_GRDCACHE) << CEPH_CAP_SLINK); } return 0; } @@ -967,14 +992,25 @@ struct ceph_mds_caps { __le64 ino; __le32 seq; __le32 caps, wanted; + __le32 migrate_seq; + __le64 snap_follows; + __le32 snap_trace_len; + + /* authlock */ + __le32 uid, gid, mode; + + /* linklock */ + __le32 nlink; + + /* xattrlock */ + __le32 xattr_len; + + /* filelock */ __le64 size, max_size; __le64 truncate_seq; - __le32 migrate_seq; struct ceph_timespec mtime, atime, ctime; struct ceph_file_layout layout; __le64 time_warp_seq; - __le64 snap_follows; - __le32 snap_trace_len; } __attribute__ ((packed)); diff --git a/src/mds/CDentry.h b/src/mds/CDentry.h index e5576de587149..24d504d6a822c 100644 --- a/src/mds/CDentry.h +++ b/src/mds/CDentry.h @@ -130,7 +130,7 @@ public: version(0), projected_version(0), xlist_dirty(this), auth_pins(0), nested_auth_pins(0), nested_anchors(0), - lock(this, CEPH_LOCK_DN, WAIT_LOCK_OFFSET) { } + lock(this, CEPH_LOCK_DN, WAIT_LOCK_OFFSET, 0) { } CDentry(const nstring& n, inodeno_t ino, unsigned char dt, snapid_t f, snapid_t l) : name(n), @@ -140,7 +140,7 @@ public: version(0), projected_version(0), xlist_dirty(this), auth_pins(0), nested_auth_pins(0), nested_anchors(0), - lock(this, CEPH_LOCK_DN, WAIT_LOCK_OFFSET) { } + lock(this, CEPH_LOCK_DN, WAIT_LOCK_OFFSET, 0) { } CInode *get_inode() const { return inode; } CDir *get_dir() const { return dir; } diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index ce200ed9129cb..a1e42fb3d2721 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -134,8 +134,8 @@ ostream& operator<<(ostream& out, CInode& in) it != in.get_client_caps().end(); it++) { if (it != in.get_client_caps().begin()) out << ","; - out << it->first << "=" << cap_string(it->second->issued()) - << "/" << cap_string(it->second->wanted()); + out << it->first << "=" << ccap_string(it->second->issued()) + << "/" << ccap_string(it->second->wanted()); } out << "}"; if (in.get_loner() >= 0) diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 396b382469941..92c7ce1809303 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -294,14 +294,14 @@ private: xlist_purging_inode(this), auth_pins(0), nested_auth_pins(0), nested_anchors(0), - versionlock(this, CEPH_LOCK_IVERSION, WAIT_VERSIONLOCK_OFFSET), - authlock(this, CEPH_LOCK_IAUTH, WAIT_AUTHLOCK_OFFSET), - linklock(this, CEPH_LOCK_ILINK, WAIT_LINKLOCK_OFFSET), - dirfragtreelock(this, CEPH_LOCK_IDFT, WAIT_DIRFRAGTREELOCK_OFFSET), - filelock(this, CEPH_LOCK_IFILE, WAIT_FILELOCK_OFFSET), - xattrlock(this, CEPH_LOCK_IXATTR, WAIT_XATTRLOCK_OFFSET), - snaplock(this, CEPH_LOCK_ISNAP, WAIT_SNAPLOCK_OFFSET), - nestlock(this, CEPH_LOCK_INEST, WAIT_NESTLOCK_OFFSET), + versionlock(this, CEPH_LOCK_IVERSION, WAIT_VERSIONLOCK_OFFSET, 0), + authlock(this, CEPH_LOCK_IAUTH, WAIT_AUTHLOCK_OFFSET, CEPH_CAP_SAUTH), + linklock(this, CEPH_LOCK_ILINK, WAIT_LINKLOCK_OFFSET, CEPH_CAP_SLINK), + dirfragtreelock(this, CEPH_LOCK_IDFT, WAIT_DIRFRAGTREELOCK_OFFSET, 0), + filelock(this, CEPH_LOCK_IFILE, WAIT_FILELOCK_OFFSET, CEPH_CAP_SFILE), + xattrlock(this, CEPH_LOCK_IXATTR, WAIT_XATTRLOCK_OFFSET, CEPH_CAP_SXATTR), + snaplock(this, CEPH_LOCK_ISNAP, WAIT_SNAPLOCK_OFFSET, 0), + nestlock(this, CEPH_LOCK_INEST, WAIT_NESTLOCK_OFFSET, 0), loner_cap(-1) { memset(&inode, 0, sizeof(inode)); @@ -487,7 +487,7 @@ public: it != client_caps.end(); it++) if (!it->second->is_stale() && - (it->second->wanted() & (CEPH_CAP_WR|CEPH_CAP_RD))) { + (it->second->wanted() & CEPH_CAP_ANY_WR)) { if (n) return false; n++; @@ -600,8 +600,32 @@ public: } } + // caps allowed + int get_caps_allowed_ever() { + return + (filelock.gcaps_allowed_ever() << filelock.get_cap_shift()) | + (authlock.gcaps_allowed_ever() << authlock.get_cap_shift()) | + (xattrlock.gcaps_allowed_ever() << xattrlock.get_cap_shift()) | + (linklock.gcaps_allowed_ever() << linklock.get_cap_shift()); + } + int get_caps_allowed(bool loner) { + return + (filelock.gcaps_allowed(loner) << filelock.get_cap_shift()) | + (authlock.gcaps_allowed(loner) << authlock.get_cap_shift()) | + (xattrlock.gcaps_allowed(loner) << xattrlock.get_cap_shift()) | + (linklock.gcaps_allowed(loner) << linklock.get_cap_shift()); + } + int get_caps_careful() { + return + (filelock.gcaps_careful() << filelock.get_cap_shift()) | + (authlock.gcaps_careful() << authlock.get_cap_shift()) | + (xattrlock.gcaps_careful() << xattrlock.get_cap_shift()) | + (linklock.gcaps_careful() << linklock.get_cap_shift()); + } + // caps issued, wanted - int get_caps_issued(int *ploner = 0, int *pother = 0) { + int get_caps_issued(int *ploner = 0, int *pother = 0, + int shift = 0, int mask = 0xffff) { int c = 0; int loner = 0, other = 0; if (!is_auth()) @@ -616,11 +640,11 @@ public: else other |= i; } - if (ploner) *ploner = loner; - if (pother) *pother = other; + if (ploner) *ploner = (loner >> shift) & mask; + if (pother) *pother = (other >> shift) & mask; return c; } - int get_caps_wanted(int *ploner = 0, int *pother = 0) { + int get_caps_wanted(int *ploner = 0, int *pother = 0, int shift = 0, int mask = 0xffff) { int w = 0; int loner = 0, other = 0; for (map::iterator it = client_caps.begin(); @@ -644,8 +668,8 @@ public: other |= it->second; //cout << " get_caps_wanted mds " << it->first << " " << cap_string(it->second) << endl; } - if (ploner) *ploner = loner; - if (pother) *pother = other; + if (ploner) *ploner = (loner >> shift) & mask; + if (pother) *pother = (other >> shift) & mask; return w; } diff --git a/src/mds/Capability.h b/src/mds/Capability.h index 6d198612ea4c2..b9a56ae61da6f 100644 --- a/src/mds/Capability.h +++ b/src/mds/Capability.h @@ -136,7 +136,7 @@ public: p++) { c |= p->second; generic_dout(10) << " cap issued: seq " << p->first << " " - << cap_string(p->second) << " -> " << cap_string(c) + << ccap_string(p->second) << " -> " << ccap_string(c) << dendl; } return c; @@ -148,26 +148,6 @@ public: wanted_caps = w; } - // needed - static int needed(int from) { - // strip out wrbuffer, rdcache - return from & (CEPH_CAP_WR|CEPH_CAP_RD); - } - int needed() { return needed(wanted_caps); } - - // conflicts - static int conflicts(int from) { - int c = 0; - if (from & CEPH_CAP_WRBUFFER) c |= CEPH_CAP_RDCACHE|CEPH_CAP_RD; - if (from & CEPH_CAP_WR) c |= CEPH_CAP_RDCACHE; - if (from & CEPH_CAP_RD) c |= CEPH_CAP_WRBUFFER; - if (from & CEPH_CAP_RDCACHE) c |= CEPH_CAP_WRBUFFER|CEPH_CAP_WR; - return c; - } - int wanted_conflicts() { return conflicts(wanted()); } - int needed_conflicts() { return conflicts(needed()); } - int issued_conflicts() { return conflicts(issued()); } - // issue caps; return seq number. capseq_t issue(int c) { ++last_sent; @@ -222,7 +202,7 @@ public: // note what we're releasing.. if (p->second & ~caps) { generic_dout(10) << " cap.confirm_receipt revising seq " << seq - << " " << cap_string(cap_history[seq]) << " -> " << cap_string(caps) + << " " << ccap_string(cap_history[seq]) << " -> " << ccap_string(caps) << dendl; r |= cap_history[seq] & ~caps; cap_history[seq] = caps; // confirmed() now less than before.. @@ -231,14 +211,14 @@ public: // null? if (caps == 0 && seq == last_sent) { generic_dout(10) << " cap.confirm_receipt making null seq " << last_recv - << " " << cap_string(cap_history[last_recv]) << dendl; + << " " << ccap_string(cap_history[last_recv]) << dendl; cap_history.clear(); // viola, null! } break; } generic_dout(10) << " cap.confirm_receipt forgetting seq " << p->first - << " " << cap_string(p->second) << dendl; + << " " << ccap_string(p->second) << dendl; r |= p->second; cap_history.erase(p); } diff --git a/src/mds/FileLock.h b/src/mds/FileLock.h index 876d108fe9fe5..cb83c662004cb 100644 --- a/src/mds/FileLock.h +++ b/src/mds/FileLock.h @@ -33,29 +33,29 @@ using namespace std; // // lower-case caps means loner-only. -// -----auth-------- ---replica------- -#define LOCK_SYNC_ 1 // AR R . / C R . . . L R . / C R . . . L stat() -#define LOCK_LONER_SYNC -12 // A . . / C r . . . L * loner -> sync -#define LOCK_MIXED_SYNC -13 // AR . w / . R . . . L . w / . R . . . L +// -----auth---------- ---replica------- +#define LOCK_SYNC_ 1 // AR R . / C . R . . . L R . / C R . . . L stat() +#define LOCK_LONER_SYNC -12 // A . . / C . r . . . L * loner -> sync +#define LOCK_MIXED_SYNC -13 // AR . w / . . R . . . L . w / . R . . . L #define LOCK_MIXED_SYNC2 -14 // R . . / . R . . . L replica already acked -#define LOCK_LOCK_SYNC_ // A . w / C . . . b L +#define LOCK_LOCK_SYNC_ // A . w / C . . . . b L -#define LOCK_LOCK_ 2 // AR R W / C . . . B . . . / C . . . . . truncate() -#define LOCK_SYNC_LOCK_ -3 // AR R . / C . . . . . r . / C . . . . . -#define LOCK_LONER_LOCK -4 // A . . / C . . . B . loner -> lock -#define LOCK_MIXED_LOCK -5 // AR . w / . . . . . . . w / . . . . . . +#define LOCK_LOCK_ 2 // AR R W / C . . . . B . . . / C . . . . . truncate() +#define LOCK_SYNC_LOCK_ -3 // AR R . / C . . . . . . r . / C . . . . . +#define LOCK_LONER_LOCK -4 // A . . / C . . . . B . loner -> lock +#define LOCK_MIXED_LOCK -5 // AR . w / . . . . . . . . w / . . . . . . -#define LOCK_MIXED 6 // AR . W / . R W A . L . W / . R . . . L -#define LOCK_SYNC_MIXED -7 // AR r . / . R . . . L r . / . R . . . L -#define LOCK_LONER_MIXED -8 // A . . / . r w a . L * loner -> mixed +#define LOCK_MIXED 6 // AR . W / . . R W A . L . W / . R . . . L +#define LOCK_SYNC_MIXED -7 // AR r . / . . R . . . L r . / . R . . . L +#define LOCK_LONER_MIXED -8 // A . . / . . r w a . L * loner -> mixed -#define LOCK_LONER 9 // A . . / c r w a b L * (lock) -#define LOCK_SYNC_LONER -10 // A r . / . R . . . L -#define LOCK_MIXED_LONER -11 // A . w / . R W A . L -#define LOCK_LOCK_LONER -16 // A . . / c . . . b . * +#define LOCK_LONER 9 // A . . / c x r w a b L * (lock) +#define LOCK_SYNC_LONER -10 // A r . / c . R . . . L * +#define LOCK_MIXED_LONER -11 // A . w / . . R W A . L +#define LOCK_LOCK_LONER -16 // A . . / c . . . . b . * + +// * <- loner_mode: caps vary if client is loner vs non-loner. -// * <- varies if client is loner vs non-loner. - /* no append scenarios: @@ -82,8 +82,8 @@ class Mutation; class FileLock : public ScatterLock { public: - FileLock(MDSCacheObject *o, int t, int wo) : - ScatterLock(o, t, wo) {} + FileLock(MDSCacheObject *o, int t, int ws, int cs) : + ScatterLock(o, t, ws, cs) {} const char *get_state_name(int n) { switch (n) { @@ -183,82 +183,93 @@ class FileLock : public ScatterLock { } - // client caps allowed - int caps_allowed_ever() { + // caps + + // true if we are in a "loner" mode that distinguishes between a loner and everyone else + bool is_loner_mode() { + return (state == LOCK_LONER_SYNC || + state == LOCK_LONER_MIXED || + state == LOCK_LONER || + state == LOCK_SYNC_LONER || + state == LOCK_LOCK_LONER); + } + int gcaps_allowed_ever() { if (parent->is_auth()) - return CEPH_CAP_PIN | - CEPH_CAP_RDCACHE | CEPH_CAP_RD | - CEPH_CAP_WR | CEPH_CAP_WREXTEND | CEPH_CAP_WRBUFFER | CEPH_CAP_EXCL | - CEPH_CAP_LAZYIO; + return + CEPH_CAP_GRDCACHE | CEPH_CAP_GEXCL | + CEPH_CAP_GRD | CEPH_CAP_GWR | + CEPH_CAP_GWREXTEND | + CEPH_CAP_GWRBUFFER | + CEPH_CAP_GLAZYIO; else - return CEPH_CAP_PIN | - CEPH_CAP_RDCACHE | CEPH_CAP_RD | CEPH_CAP_LAZYIO; + return + CEPH_CAP_GRDCACHE | CEPH_CAP_GRD | CEPH_CAP_GLAZYIO; } - int caps_allowed(bool loner) { + int gcaps_allowed(bool loner) { + if (loner && !is_loner_mode()) + loner = false; if (parent->is_auth()) switch (state) { case LOCK_SYNC: - return CEPH_CAP_PIN | CEPH_CAP_RDCACHE | CEPH_CAP_RD | CEPH_CAP_LAZYIO; + return CEPH_CAP_GRDCACHE | CEPH_CAP_GRD | CEPH_CAP_GLAZYIO; case LOCK_SYNC_LOCK: - return CEPH_CAP_PIN | CEPH_CAP_RDCACHE; + return CEPH_CAP_GRDCACHE; case LOCK_LOCK: case LOCK_LONER_LOCK: - return CEPH_CAP_PIN | CEPH_CAP_RDCACHE | CEPH_CAP_WRBUFFER; + return CEPH_CAP_GRDCACHE | CEPH_CAP_GWRBUFFER; case LOCK_LOCK_SYNC: - return CEPH_CAP_PIN | CEPH_CAP_RDCACHE | CEPH_CAP_LAZYIO; + return CEPH_CAP_GRDCACHE | CEPH_CAP_GLAZYIO; case LOCK_MIXED_LOCK: - return CEPH_CAP_PIN; + return 0; case LOCK_MIXED: - return CEPH_CAP_PIN | CEPH_CAP_RD | CEPH_CAP_WR | CEPH_CAP_WREXTEND | CEPH_CAP_LAZYIO; + return CEPH_CAP_GRD | CEPH_CAP_GWR | CEPH_CAP_GWREXTEND | CEPH_CAP_GLAZYIO; case LOCK_SYNC_MIXED: - return CEPH_CAP_PIN | CEPH_CAP_RD | CEPH_CAP_LAZYIO; + return CEPH_CAP_GRD | CEPH_CAP_GLAZYIO; case LOCK_LONER_MIXED: - return CEPH_CAP_PIN | (loner ? (CEPH_CAP_RD | CEPH_CAP_WR | CEPH_CAP_WREXTEND) : 0); + return (loner ? (CEPH_CAP_GRD | CEPH_CAP_GWR | CEPH_CAP_GWREXTEND) : 0); - case LOCK_LONER: // single client writer, of course. - return CEPH_CAP_PIN | CEPH_CAP_LAZYIO | - ( loner ? (CEPH_CAP_RDCACHE | CEPH_CAP_RD | CEPH_CAP_WR | CEPH_CAP_WREXTEND | CEPH_CAP_WRBUFFER | CEPH_CAP_EXCL) : 0 ); + case LOCK_LONER: + return CEPH_CAP_GLAZYIO | + ( loner ? (CEPH_CAP_GRDCACHE | CEPH_CAP_GRD | CEPH_CAP_GWR | CEPH_CAP_GWREXTEND | CEPH_CAP_GWRBUFFER | CEPH_CAP_GEXCL) : 0 ); case LOCK_SYNC_LONER: - return CEPH_CAP_PIN | CEPH_CAP_RD | CEPH_CAP_LAZYIO; + return CEPH_CAP_GRD | CEPH_CAP_GLAZYIO | (loner ? CEPH_CAP_GRDCACHE : 0); case LOCK_MIXED_LONER: - return CEPH_CAP_PIN | CEPH_CAP_RD | CEPH_CAP_WR | CEPH_CAP_WREXTEND | CEPH_CAP_LAZYIO; + return CEPH_CAP_GRD | CEPH_CAP_GWR | CEPH_CAP_GWREXTEND | CEPH_CAP_GLAZYIO; case LOCK_LOCK_LONER: - return CEPH_CAP_PIN | (loner ? (CEPH_CAP_RDCACHE | CEPH_CAP_WRBUFFER) : 0); + return (loner ? (CEPH_CAP_GRDCACHE | CEPH_CAP_GWRBUFFER) : 0); case LOCK_LONER_SYNC: - return CEPH_CAP_PIN | CEPH_CAP_RDCACHE | (loner ? CEPH_CAP_RD:0) | CEPH_CAP_LAZYIO; + return CEPH_CAP_GRDCACHE | (loner ? CEPH_CAP_GRD:0) | CEPH_CAP_GLAZYIO; case LOCK_MIXED_SYNC: - return CEPH_CAP_PIN | CEPH_CAP_RD | CEPH_CAP_LAZYIO; + return CEPH_CAP_GRD | CEPH_CAP_GLAZYIO; } else switch (state) { case LOCK_SYNC: - return CEPH_CAP_PIN | CEPH_CAP_RDCACHE | CEPH_CAP_RD | CEPH_CAP_LAZYIO; + return CEPH_CAP_GRDCACHE | CEPH_CAP_GRD | CEPH_CAP_GLAZYIO; case LOCK_LOCK: case LOCK_SYNC_LOCK: - return CEPH_CAP_PIN | CEPH_CAP_RDCACHE; + return CEPH_CAP_GRDCACHE; case LOCK_SYNC_MIXED: case LOCK_MIXED: - return CEPH_CAP_PIN | CEPH_CAP_RD | CEPH_CAP_LAZYIO; + return CEPH_CAP_GRD | CEPH_CAP_GLAZYIO; case LOCK_MIXED_SYNC: case LOCK_MIXED_SYNC2: - return CEPH_CAP_PIN | CEPH_CAP_RDCACHE | CEPH_CAP_LAZYIO; + return CEPH_CAP_GRDCACHE | CEPH_CAP_GLAZYIO; } assert(0); return 0; } - - // true if we are in a "loner" mode that distinguishes between a loner and everyone else - bool is_loner_mode() { - return (state == LOCK_LONER_SYNC || - state == LOCK_LONER_MIXED || - state == LOCK_LONER || - state == LOCK_LOCK_LONER); + int gcaps_careful() { + if (num_wrlock) + return CEPH_CAP_GRDCACHE | CEPH_CAP_GEXCL | CEPH_CAP_GWRBUFFER; + return 0; } + }; diff --git a/src/mds/LocalLock.h b/src/mds/LocalLock.h index dfbaf2e8a53a8..2ec7f72307b72 100644 --- a/src/mds/LocalLock.h +++ b/src/mds/LocalLock.h @@ -23,8 +23,8 @@ protected: int num_wrlock; public: - LocalLock(MDSCacheObject *o, int t, int wo) : - SimpleLock(o, t, wo), + LocalLock(MDSCacheObject *o, int t, int ws, int cs ) : + SimpleLock(o, t, ws, cs), num_wrlock(0) { set_state(LOCK_LOCK); // always. } diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 8ad4f212c187c..b7968f743bef2 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -543,8 +543,6 @@ Capability* Locker::issue_new_caps(CInode *in, } } - int before = cap->pending(); - if (in->is_auth()) { // [auth] twiddle mode? if (in->filelock.is_stable()) @@ -561,14 +559,6 @@ Capability* Locker::issue_new_caps(CInode *in, cap->issue(cap->pending()); cap->set_last_open(); - // twiddle file_data_version? - int now = cap->pending(); - if ((before & CEPH_CAP_WRBUFFER) == 0 && - (now & CEPH_CAP_WRBUFFER)) { - in->inode.file_data_version++; - dout(7) << " incrementing file_data_version, now " << in->inode.file_data_version << " for " << *in << dendl; - } - return cap; } @@ -578,41 +568,31 @@ Capability* Locker::issue_new_caps(CInode *in, bool Locker::issue_caps(CInode *in) { // allowed caps are determined by the lock mode. - int all_allowed = in->filelock.caps_allowed(false); - - // loner mode? if so, we restict allows caps to a single loner client - bool loner_mode = in->filelock.is_loner_mode(); - int loner_allowed; - if (loner_mode) - loner_allowed = in->filelock.caps_allowed(true); - else - loner_allowed = all_allowed; - - int loner = -1; - if (loner_mode) { - loner = in->get_loner(); - dout(7) << "issue_caps filelock loner client" << loner - << " allowed=" << cap_string(loner_allowed) - << ", others allowed=" << cap_string(all_allowed) + int all_allowed = in->get_caps_allowed(false); + int loner_allowed = in->get_caps_allowed(true); + int careful = in->get_caps_careful(); + + int loner = in->get_loner(); + if (loner >= 0) { + dout(7) << "issue_caps loner client" << loner + << " allowed=" << ccap_string(loner_allowed) + << ", others allowed=" << ccap_string(all_allowed) << " on " << *in << dendl; } else { - dout(7) << "issue_caps filelock allowed=" << cap_string(all_allowed) + dout(7) << "issue_caps allowed=" << ccap_string(all_allowed) << " on " << *in << dendl; } + + if (careful) + dout(7) << "issue_caps careful " << ccap_string(careful) << dendl; // count conflicts with int nissued = 0; - bool sizemtime_is_projected = false; - if (&in->inode != in->get_projected_inode() && - (in->inode.size != in->get_projected_inode()->size || - in->inode.mtime != in->get_projected_inode()->mtime)) { - dout(10) << " new size|mtime is projected" << dendl; - sizemtime_is_projected = true; - } - // should we increase max_size? - if (!in->is_dir() && ((all_allowed|loner_allowed) & CEPH_CAP_WR) && in->is_auth()) + if (!in->is_dir() && + ((all_allowed|loner_allowed) & (CEPH_CAP_GWR<is_auth()) check_inode_max_size(in); // client caps @@ -625,20 +605,18 @@ bool Locker::issue_caps(CInode *in) // do not issue _new_ bits when size|mtime is projected int allowed; - if (loner_mode && loner == it->first) + if (loner == it->first) allowed = loner_allowed; else allowed = all_allowed; - int careful = CEPH_CAP_EXCL|CEPH_CAP_WRBUFFER|CEPH_CAP_RDCACHE; int pending = cap->pending(); - if (sizemtime_is_projected) - allowed &= ~careful | pending; // only allow "careful" bits if already issued + allowed &= ~careful | pending; // only allow "careful" bits if already issued dout(20) << " client" << it->first - << " pending " << cap_string(pending) - << " allowed " << cap_string(allowed) - << " wanted " << cap_string(cap->wanted()) + << " pending " << ccap_string(pending) + << " allowed " << ccap_string(allowed) + << " wanted " << ccap_string(cap->wanted()) << dendl; if (cap->pending() != (cap->wanted() & allowed)) { @@ -647,28 +625,20 @@ bool Locker::issue_caps(CInode *in) int before = cap->pending(); long seq = cap->issue(cap->wanted() & allowed); - int after = cap->pending(); - - // twiddle file_data_version? - if (!(before & CEPH_CAP_WRBUFFER) && - (after & CEPH_CAP_WRBUFFER)) { - dout(7) << " incrementing file_data_version for " << *in << dendl; - in->inode.file_data_version++; - } if (seq > 0 && !cap->is_suppress()) { dout(7) << " sending MClientCaps to client" << it->first << " seq " << cap->get_last_seq() - << " new pending " << cap_string(cap->pending()) << " was " << cap_string(before) + << " new pending " << ccap_string(cap->pending()) << " was " << ccap_string(before) << dendl; mds->send_message_client(new MClientCaps(CEPH_CAP_OP_GRANT, - in->inode, - in->find_snaprealm()->inode->ino(), - cap->get_last_seq(), - cap->pending(), - cap->wanted(), - cap->get_mseq()), + in->inode, + in->find_snaprealm()->inode->ino(), + cap->get_last_seq(), + cap->pending(), + cap->wanted(), + cap->get_mseq()), it->first); } } @@ -686,12 +656,12 @@ void Locker::issue_truncate(CInode *in) it++) { Capability *cap = it->second; mds->send_message_client(new MClientCaps(CEPH_CAP_OP_TRUNC, - in->inode, - in->find_snaprealm()->inode->ino(), - cap->get_last_seq(), - cap->pending(), - cap->wanted(), - cap->get_mseq()), + in->inode, + in->find_snaprealm()->inode->ino(), + cap->get_last_seq(), + cap->pending(), + cap->wanted(), + cap->get_mseq()), it->first); } @@ -710,15 +680,20 @@ void Locker::revoke_stale_caps(Session *session) CInode *in = cap->get_inode(); int issued = cap->issued(); if (issued) { - dout(10) << " revoking " << cap_string(issued) << " on " << *in << dendl; + dout(10) << " revoking " << ccap_string(issued) << " on " << *in << dendl; cap->revoke(); + if (in->inode.max_size > in->inode.size) in->state_set(CInode::STATE_NEEDSRECOVER); - if (!in->filelock.is_stable()) - file_eval_gather(&in->filelock); + + if (!in->filelock.is_stable()) file_eval_gather(&in->filelock); + if (!in->authlock.is_stable()) simple_eval_gather(&in->authlock); + if (!in->xattrlock.is_stable()) simple_eval_gather(&in->xattrlock); + if (in->is_auth()) { - if (in->filelock.is_stable()) - file_eval(&in->filelock); + if (in->filelock.is_stable()) file_eval(&in->filelock); + if (in->authlock.is_stable()) simple_eval(&in->authlock); + if (in->xattrlock.is_stable()) simple_eval(&in->xattrlock); } else { request_inode_file_caps(in); } @@ -783,8 +758,8 @@ void Locker::request_inode_file_caps(CInode *in) if (in->replica_caps_wanted_keep_until > g_clock.recent_now()) { // ok, release them finally! in->replica_caps_wanted_keep_until.sec_ref() = 0; - dout(7) << "request_inode_file_caps " << cap_string(wanted) - << " was " << cap_string(in->replica_caps_wanted) + dout(7) << "request_inode_file_caps " << ccap_string(wanted) + << " was " << ccap_string(in->replica_caps_wanted) << " no keeping anymore " << " on " << *in << dendl; @@ -793,8 +768,8 @@ void Locker::request_inode_file_caps(CInode *in) in->replica_caps_wanted_keep_until = g_clock.recent_now(); in->replica_caps_wanted_keep_until.sec_ref() += 2; - dout(7) << "request_inode_file_caps " << cap_string(wanted) - << " was " << cap_string(in->replica_caps_wanted) + dout(7) << "request_inode_file_caps " << ccap_string(wanted) + << " was " << ccap_string(in->replica_caps_wanted) << " keeping until " << in->replica_caps_wanted_keep_until << " on " << *in << dendl; @@ -816,8 +791,8 @@ void Locker::request_inode_file_caps(CInode *in) } int auth = in->authority().first; - dout(7) << "request_inode_file_caps " << cap_string(wanted) - << " was " << cap_string(in->replica_caps_wanted) + dout(7) << "request_inode_file_caps " << ccap_string(wanted) + << " was " << ccap_string(in->replica_caps_wanted) << " on " << *in << " to mds" << auth << dendl; assert(!in->is_auth()); @@ -850,7 +825,7 @@ void Locker::handle_inode_file_caps(MInodeFileCaps *m) } - dout(7) << "handle_inode_file_caps replica mds" << m->get_from() << " wants caps " << cap_string(m->get_caps()) << " on " << *in << dendl; + dout(7) << "handle_inode_file_caps replica mds" << m->get_from() << " wants caps " << ccap_string(m->get_caps()) << " on " << *in << dendl; if (m->get_caps()) in->mds_caps_wanted[m->get_from()] = m->get_caps(); @@ -888,7 +863,7 @@ bool Locker::check_inode_max_size(CInode *in, bool forceupdate, __u64 new_size) if (forceupdate) size = new_size; - if ((in->get_caps_wanted() & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) == 0) + if ((in->get_caps_wanted() & ((CEPH_CAP_GWR|CEPH_CAP_GWRBUFFER) << CEPH_CAP_SFILE)) == 0) new_max = 0; else if ((size << 1) >= latest->max_size) new_max = latest->max_size ? (latest->max_size << 1):in->get_layout_size_increment(); @@ -969,7 +944,7 @@ void Locker::share_inode_max_size(CInode *in) it++) { const int client = it->first; Capability *cap = it->second; - if (cap->pending() & CEPH_CAP_WR) { + if (cap->pending() & (CEPH_CAP_GWR<send_message_client(new MClientCaps(CEPH_CAP_OP_GRANT, in->inode, @@ -1062,12 +1037,12 @@ void Locker::handle_client_caps(MClientCaps *m) // for this and all subsequent versions of this inode, while (1) { // filter wanted based on what we could ever give out (given auth/replica status) - int wanted = m->get_wanted() & head_in->filelock.caps_allowed_ever(); + int wanted = m->get_wanted() & head_in->get_caps_allowed_ever(); int had = cap->confirm_receipt(m->get_seq(), m->get_caps()); int has = cap->confirmed(); dout(10) << " follows " << follows - << ", had " << cap_string(had) - << ", has " << cap_string(has) + << ", had " << ccap_string(had) + << ", has " << ccap_string(has) << " on " << *in << dendl; MClientCaps *ack = 0; @@ -1084,7 +1059,7 @@ void Locker::handle_client_caps(MClientCaps *m) * we use last_sent here, not last_open, just to keep the client * logic for deciding when to reply to a revocation simple. */ - dout(10) << " ignoring release|wanted " << cap_string(m->get_wanted()) + dout(10) << " ignoring release|wanted " << ccap_string(m->get_wanted()) << " bc seq " << m->get_seq() << " < last sent " << cap->get_last_sent() << dendl; } else if (m->get_op() == CEPH_CAP_OP_RELEASE) { dout(7) << " release request client" << client << " seq " << m->get_seq() << " on " << *in << dendl; @@ -1097,8 +1072,8 @@ void Locker::handle_client_caps(MClientCaps *m) cap->releasing++; ack = new MClientCaps(CEPH_CAP_OP_RELEASED, in->inode, 0, 0, 0, 0, 0); } else if (wanted != cap->wanted()) { - dout(10) << " wanted " << cap_string(cap->wanted()) - << " -> " << cap_string(wanted) << dendl; + dout(10) << " wanted " << ccap_string(cap->wanted()) + << " -> " << ccap_string(wanted) << dendl; cap->set_wanted(wanted); } @@ -1157,7 +1132,7 @@ void Locker::_finish_release_cap(CInode *in, int client, capseq_t seq, MClientCa void Locker::_do_cap_update(CInode *in, int had, int all_wanted, snapid_t follows, MClientCaps *m, MClientCaps *ack, capseq_t releasecap) { - dout(10) << "_do_cap_update had " << cap_string(had) << " on " << *in << dendl; + dout(10) << "_do_cap_update had " << ccap_string(had) << " on " << *in << dendl; int client = m->get_source().num(); @@ -1169,8 +1144,8 @@ void Locker::_do_cap_update(CInode *in, int had, int all_wanted, snapid_t follow uint64_t size = m->get_size(); // atime|mtime|size? - bool had_or_has_wr = had & CEPH_CAP_WR; - bool excl = had & CEPH_CAP_EXCL; + bool had_or_has_wr = had & (CEPH_CAP_GWR << CEPH_CAP_SFILE); + bool excl = had & CEPH_CAP_ANY_EXCL; bool dirty_atime = false; bool dirty_mtime = false; bool dirty_ctime = false; @@ -1192,18 +1167,18 @@ void Locker::_do_cap_update(CInode *in, int had, int all_wanted, snapid_t follow uint64_t new_max = latest->max_size; if (in->is_auth()) { - if (latest->max_size && (all_wanted & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) == 0) { + if (latest->max_size && (all_wanted & CEPH_CAP_ANY_FILE_WR) == 0) { change_max = true; new_max = 0; } - else if ((all_wanted & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER|CEPH_CAP_WREXTEND)) && + else if ((all_wanted & CEPH_CAP_ANY_FILE_WR) && (size << 1) >= latest->max_size) { dout(10) << "wr caps wanted, and size " << size << " *2 >= max " << latest->max_size << ", increasing" << dendl; change_max = true; new_max = latest->max_size ? (latest->max_size << 1):in->get_layout_size_increment(); } - if ((all_wanted & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER|CEPH_CAP_WREXTEND)) && + if ((all_wanted & CEPH_CAP_ANY_FILE_WR) && m->get_max_size() > new_max) { dout(10) << "client requests file_max " << m->get_max_size() << " > max " << latest->max_size << dendl; @@ -1391,7 +1366,7 @@ int Locker::issue_client_lease(CDentry *dn, int client, if (!diri->is_stray() && // do not issue dn leases in stray dir! (diri->is_base() || // base inode's don't get version updated, so ICONTENT is useless. (!diri->filelock.can_lease() && - (diri->get_client_cap_pending(client) & (CEPH_CAP_EXCL|CEPH_CAP_RDCACHE)) == 0)) && + (diri->get_client_cap_pending(client) & ((CEPH_CAP_GEXCL|CEPH_CAP_GRDCACHE) << CEPH_CAP_SFILE)) == 0)) && dn->lock.can_lease(client)) mask |= CEPH_LOCK_DN; @@ -3347,15 +3322,16 @@ void Locker::file_eval_gather(FileLock *lock) { CInode *in = (CInode*)lock->get_parent(); - int loner_allowed = lock->caps_allowed(true); - int other_allowed = lock->caps_allowed(false); + // allowed _just_ by this lock + int loner_allowed = lock->gcaps_allowed(true); + int other_allowed = lock->gcaps_allowed(false); int loner_issued, other_issued; - in->get_caps_issued(&loner_issued, &other_issued); + in->get_caps_issued(&loner_issued, &other_issued, CEPH_CAP_SFILE); dout(7) << "file_eval_gather issued " - << cap_string(loner_issued) << "/" << cap_string(other_issued) << " vs " - << cap_string(loner_allowed) << "/" << cap_string(other_allowed) + << gcap_string(loner_issued) << "/" << gcap_string(other_issued) << " vs " + << gcap_string(loner_allowed) << "/" << gcap_string(other_allowed) << " on " << *lock << " on " << *lock->get_parent() << dendl; if (lock->is_stable()) @@ -3526,8 +3502,8 @@ void Locker::file_eval(FileLock *lock) { CInode *in = (CInode*)lock->get_parent(); int loner_wanted, other_wanted; - int wanted = in->get_caps_wanted(&loner_wanted, &other_wanted); - dout(7) << "file_eval wanted=" << cap_string(wanted) + int wanted = in->get_caps_wanted(&loner_wanted, &other_wanted, CEPH_CAP_SFILE); + dout(7) << "file_eval wanted=" << gcap_string(wanted) << " filelock=" << *lock << " on " << *lock->get_parent() << " loner " << in->get_loner() << dendl; @@ -3545,10 +3521,10 @@ void Locker::file_eval(FileLock *lock) in->get_caps_issued(&loner_issued, &other_issued); if (in->get_loner() >= 0) { - if ((loner_wanted & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER|CEPH_CAP_RD)) == 0 || - (other_wanted & (CEPH_CAP_WR|CEPH_CAP_RD))) { + if ((loner_wanted & (CEPH_CAP_GWR|CEPH_CAP_GWRBUFFER|CEPH_CAP_GRD)) == 0 || + (other_wanted & (CEPH_CAP_GWR|CEPH_CAP_GRD))) { // we should lose it. - if ((other_wanted & CEPH_CAP_WR) || + if ((other_wanted & CEPH_CAP_GWR) || lock->is_waiter_for(SimpleLock::WAIT_WR) || lock->is_wrlocked()) file_mixed(lock); @@ -3562,7 +3538,7 @@ void Locker::file_eval(FileLock *lock) else if (lock->get_state() != LOCK_LONER && !lock->is_rdlocked() && !lock->is_waiter_for(SimpleLock::WAIT_WR) && - (wanted & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) && + (wanted & (CEPH_CAP_GWR|CEPH_CAP_GWRBUFFER)) && in->choose_loner()) { dout(7) << "file_eval stable, bump to loner " << *lock << " on " << *lock->get_parent() << dendl; @@ -3573,8 +3549,8 @@ void Locker::file_eval(FileLock *lock) else if (lock->get_state() != LOCK_MIXED && !lock->is_rdlocked() && !lock->is_waiter_for(SimpleLock::WAIT_WR) && - (wanted & CEPH_CAP_RD) && - (wanted & CEPH_CAP_WR)) { + (wanted & CEPH_CAP_GRD) && + (wanted & CEPH_CAP_GWR)) { dout(7) << "file_eval stable, bump to mixed " << *lock << " on " << *lock->get_parent() << dendl; file_mixed(lock); @@ -3583,7 +3559,7 @@ void Locker::file_eval(FileLock *lock) // * -> sync? else if (lock->get_state() != LOCK_SYNC && !in->filelock.is_waiter_for(SimpleLock::WAIT_WR) && - !(wanted & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) && + !(wanted & (CEPH_CAP_GWR|CEPH_CAP_GWRBUFFER)) && !(in->get_state() == LOCK_MIXED && in->is_dir() && in->has_subtree_root_dirfrag()) // if we are a delegation point, stay where we are //((wanted & CEPH_CAP_RD) || @@ -3621,9 +3597,9 @@ bool Locker::file_sync(FileLock *lock) int gather = 0; int loner_issued, other_issued; - in->get_caps_issued(&loner_issued, &other_issued); - if ((loner_issued & ~lock->caps_allowed(true)) || - (other_issued & ~lock->caps_allowed(false))) { + in->get_caps_issued(&loner_issued, &other_issued, CEPH_CAP_SFILE); + if ((loner_issued & ~lock->gcaps_allowed(true)) || + (other_issued & ~lock->gcaps_allowed(false))) { issue_caps(in); gather++; } @@ -3689,9 +3665,9 @@ void Locker::file_lock(FileLock *lock) gather++; } int loner_issued, other_issued; - in->get_caps_issued(&loner_issued, &other_issued); - if ((loner_issued & ~lock->caps_allowed(true)) || - (other_issued & ~lock->caps_allowed(false))) { + in->get_caps_issued(&loner_issued, &other_issued, CEPH_CAP_SFILE); + if ((loner_issued & ~lock->gcaps_allowed(true)) || + (other_issued & ~lock->gcaps_allowed(false))) { issue_caps(in); gather++; } @@ -3757,9 +3733,9 @@ void Locker::file_mixed(FileLock *lock) gather++; } int loner_issued, other_issued; - in->get_caps_issued(&loner_issued, &other_issued); - if ((loner_issued & ~lock->caps_allowed(true)) || - (other_issued & ~lock->caps_allowed(false))) { + in->get_caps_issued(&loner_issued, &other_issued, CEPH_CAP_SFILE); + if ((loner_issued & ~lock->gcaps_allowed(true)) || + (other_issued & ~lock->gcaps_allowed(false))) { issue_caps(in); gather++; } @@ -3809,10 +3785,10 @@ void Locker::file_loner(FileLock *lock) gather++; } int loner_issued, other_issued; - in->get_caps_issued(&loner_issued, &other_issued); - dout(10) << " issued loner " << cap_string(loner_issued) << " other " << cap_string(other_issued) << dendl; - if ((loner_issued & ~lock->caps_allowed(true)) || - (other_issued & ~lock->caps_allowed(false))) { + in->get_caps_issued(&loner_issued, &other_issued, CEPH_CAP_SFILE); + dout(10) << " issued loner " << gcap_string(loner_issued) << " other " << gcap_string(other_issued) << dendl; + if ((loner_issued & ~lock->gcaps_allowed(true)) || + (other_issued & ~lock->gcaps_allowed(false))) { issue_caps(in); gather++; } @@ -3893,9 +3869,9 @@ void Locker::handle_file_lock(FileLock *lock, MLock *m) // call back caps? int loner_issued, other_issued; - in->get_caps_issued(&loner_issued, &other_issued); - if ((loner_issued & ~lock->caps_allowed(true)) || - (other_issued & ~lock->caps_allowed(false))) { + in->get_caps_issued(&loner_issued, &other_issued, CEPH_CAP_SFILE); + if ((loner_issued & ~lock->gcaps_allowed(true)) || + (other_issued & ~lock->gcaps_allowed(false))) { dout(7) << "handle_file_lock client readers, gathering caps on " << *in << dendl; issue_caps(in); break; @@ -3924,9 +3900,9 @@ void Locker::handle_file_lock(FileLock *lock, MLock *m) // MIXED lock->set_state(LOCK_SYNC_MIXED); int loner_issued, other_issued; - in->get_caps_issued(&loner_issued, &other_issued); - if ((loner_issued & ~lock->caps_allowed(true)) || - (other_issued & ~lock->caps_allowed(false))) { + in->get_caps_issued(&loner_issued, &other_issued, CEPH_CAP_SFILE); + if ((loner_issued & ~lock->gcaps_allowed(true)) || + (other_issued & ~lock->gcaps_allowed(false))) { // call back client caps issue_caps(in); break; diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index bba97882c8ca4..da09849566dd4 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -1004,7 +1004,7 @@ CInode *MDCache::cow_inode(CInode *in, snapid_t last) p != in->client_caps.end(); p++) { Capability *cap = p->second; - if ((cap->issued() & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) && + if ((cap->issued() & CEPH_CAP_ANY_WR) && cap->client_follows < last) { // clone to oldin int client = p->first; @@ -3085,7 +3085,7 @@ void MDCache::handle_cache_rejoin_strong(MMDSCacheRejoin *strong) // caps_wanted if (is.caps_wanted) { in->mds_caps_wanted[from] = is.caps_wanted; - dout(15) << " inode caps_wanted " << cap_string(is.caps_wanted) + dout(15) << " inode caps_wanted " << ccap_string(is.caps_wanted) << " on " << *in << dendl; } @@ -3486,9 +3486,9 @@ void MDCache::process_reconnected_caps() int issued = in->get_caps_issued(); if (in->is_auth()) { // wr? - if (issued & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) { + if (issued & CEPH_CAP_ANY_WR) { in->loner_cap = -1; - if (issued & (CEPH_CAP_RDCACHE|CEPH_CAP_WRBUFFER)) { + if (issued & ((CEPH_CAP_GRDCACHE|CEPH_CAP_GWRBUFFER) << CEPH_CAP_SFILE)) { in->filelock.set_state(LOCK_LONER); in->choose_loner(); } else { @@ -3497,14 +3497,14 @@ void MDCache::process_reconnected_caps() } } else { // note that client should perform stale/reap cleanup during reconnect. - assert((issued & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) == 0); // ???? + assert((issued & CEPH_CAP_ANY_WR) == 0); // ???? in->loner_cap = -1; if (in->filelock.is_xlocked()) in->filelock.set_state(LOCK_LOCK); else in->filelock.set_state(LOCK_SYNC); // might have been lock, previously } - dout(15) << " issued " << cap_string(issued) + dout(15) << " issued " << ccap_string(issued) << " chose " << in->filelock << " on " << *in << dendl; diff --git a/src/mds/ScatterLock.h b/src/mds/ScatterLock.h index 2490c3be20af8..a3948ca6463a7 100644 --- a/src/mds/ScatterLock.h +++ b/src/mds/ScatterLock.h @@ -49,8 +49,8 @@ public: xlist::item xlistitem_updated; utime_t update_stamp; - ScatterLock(MDSCacheObject *o, int t, int wo) : - SimpleLock(o, t, wo), + ScatterLock(MDSCacheObject *o, int t, int ws, int cs) : + SimpleLock(o, t, ws, cs), updated(false), xlistitem_updated(this) {} ~ScatterLock() { diff --git a/src/mds/Server.cc b/src/mds/Server.cc index f27e89270b04d..f04967206390b 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -229,7 +229,7 @@ void Server::_session_logged(Session *session, bool open, version_t pv, dequecaps.empty()) { Capability *cap = session->caps.front(); CInode *in = cap->get_inode(); - dout(20) << " killing capability " << cap_string(cap->issued()) << " on " << *in << dendl; + dout(20) << " killing capability " << ccap_string(cap->issued()) << " on " << *in << dendl; in->remove_client_cap(session->inst.name.num()); mds->locker->try_file_eval(&in->filelock); } @@ -4883,13 +4883,13 @@ void Server::_do_open(MDRequest *mdr, CInode *cur) if (is_new) cap->dec_suppress(); // stop suppressing messages on new cap - dout(12) << "_do_open issued caps " << cap_string(cap->pending()) + dout(12) << "_do_open issued caps " << ccap_string(cap->pending()) << " for " << req->get_orig_source() << " on " << *cur << dendl; mdr->cap = cap; } else { int caps = ceph_caps_for_mode(cmode); - dout(12) << "_do_open issued IMMUTABLE SNAP caps " << cap_string(caps) + dout(12) << "_do_open issued IMMUTABLE SNAP caps " << ccap_string(caps) << " for " << req->get_orig_source() << " snapid " << mdr->ref_snapid << " on " << *cur << dendl; diff --git a/src/mds/SimpleLock.h b/src/mds/SimpleLock.h index 1ac9c6e413ffd..417e2fab1ddde 100644 --- a/src/mds/SimpleLock.h +++ b/src/mds/SimpleLock.h @@ -38,13 +38,21 @@ inline const char *get_lock_type_name(int t) { // -- lock states -- // sync <-> lock #define LOCK_UNDEF 0 -// auth rep -#define LOCK_SYNC 1 // AR R . R . -#define LOCK_LOCK 2 // AR R W . . -#define LOCK_SYNC_LOCK -3 // AR R . . . -#define LOCK_LOCK_SYNC -51 // A R w +// auth rep +#define LOCK_SYNC 1 // AR R . / C . R . / C . +#define LOCK_LOCK 2 // AR R W / . . . . / . . +#define LOCK_SYNC_LOCK -3 // AR R . / . . . . / . . +#define LOCK_LOCK_SYNC -51 // A R w / . . (lock) + +#define LOCK_EXCL -60 // A . . / c x * (lock) +#define LOCK_EXCL_SYNC -61 // A . . / c . * (lock) +#define LOCK_EXCL_LOCK -62 // A . . / . . (lock) +#define LOCK_SYNC_EXCL -63 // Ar r . / c . * (sync->lock) +#define LOCK_LOCK_EXCL -64 // A r w / . . (lock) + #define LOCK_REMOTEXLOCK -50 // on NON-auth +// * = loner mode /* @@ -90,7 +98,8 @@ protected: // parent (what i lock) MDSCacheObject *parent; int type; - int wait_offset; + int wait_shift; + int cap_shift; // lock state __s32 state; @@ -104,8 +113,8 @@ protected: public: - SimpleLock(MDSCacheObject *o, int t, int wo) : - parent(o), type(t), wait_offset(wo), + SimpleLock(MDSCacheObject *o, int t, int ws, int cs) : + parent(o), type(t), wait_shift(ws), cap_shift(cs), state(LOCK_SYNC), num_client_lease(0), num_rdlock(0), num_wrlock(0), xlock_by(0), xlock_by_client(-1) { } virtual ~SimpleLock() {} @@ -114,6 +123,8 @@ public: MDSCacheObject *get_parent() { return parent; } int get_type() { return type; } + int get_cap_shift() { return cap_shift; } + struct ptr_lt { bool operator()(const SimpleLock* l, const SimpleLock* r) const { // first sort by object type (dn < inode) @@ -137,16 +148,16 @@ public: parent->encode_lock_state(type, bl); } void finish_waiters(__u64 mask, int r=0) { - parent->finish_waiting(mask << wait_offset, r); + parent->finish_waiting(mask << wait_shift, r); } void take_waiting(__u64 mask, list& ls) { - parent->take_waiting(mask << wait_offset, ls); + parent->take_waiting(mask << wait_shift, ls); } void add_waiter(__u64 mask, Context *c) { - parent->add_waiter(mask << wait_offset, c); + parent->add_waiter(mask << wait_shift, c); } bool is_waiter_for(__u64 mask) { - return parent->is_waiter_for(mask << wait_offset); + return parent->is_waiter_for(mask << wait_shift); } @@ -284,6 +295,50 @@ public: } + // caps + virtual bool is_loner_mode() { + return (state == LOCK_EXCL || + state == LOCK_EXCL_SYNC || + state == LOCK_SYNC_EXCL); + } + virtual int gcaps_allowed_ever() { + if (!cap_shift) + return 0; // none for this lock. + return CEPH_CAP_GRDCACHE | CEPH_CAP_GEXCL; + } + virtual int gcaps_allowed(bool loner) { + if (!cap_shift) + return 0; + if (loner && !is_loner_mode()) + loner = false; + if (parent->is_auth()) + switch (state) { + case LOCK_SYNC: return CEPH_CAP_GRDCACHE; + case LOCK_LOCK: return 0; + case LOCK_SYNC_LOCK: return 0; + case LOCK_LOCK_SYNC: return 0; + case LOCK_EXCL: return loner ? (CEPH_CAP_GRDCACHE|CEPH_CAP_GEXCL) : 0; + case LOCK_EXCL_SYNC: return loner ? CEPH_CAP_GRDCACHE:0; + case LOCK_EXCL_LOCK: return 0; + case LOCK_SYNC_EXCL: return loner ? CEPH_CAP_GRDCACHE:0; + case LOCK_LOCK_EXCL: return 0; + case LOCK_REMOTEXLOCK: return 0; + } + else + switch (state) { + case LOCK_SYNC: return CEPH_CAP_GRDCACHE; + default: return 0; + } + assert(0); + return 0; + } + virtual int gcaps_careful() { + if (num_wrlock) + return CEPH_CAP_GRDCACHE | CEPH_CAP_GEXCL; + return 0; + } + + // simplelock specifics virtual int get_replica_state() const { switch (state) { diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index 5a3a8b4891b13..8b21939a14798 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -55,29 +55,35 @@ using namespace std; typedef __u32 capseq_t; -inline string cap_string(int cap) +inline string gcap_string(int cap) +{ + string s; + if (cap & CEPH_CAP_GRDCACHE) s += "c"; + if (cap & CEPH_CAP_GEXCL) s += "x"; + if (cap & CEPH_CAP_GRD) s += "r"; + if (cap & CEPH_CAP_GWR) s += "w"; + if (cap & CEPH_CAP_GWRBUFFER) s += "b"; + if (cap & CEPH_CAP_GWREXTEND) s += "a"; + if (cap & CEPH_CAP_GLAZYIO) s += "l"; + return s; +} +inline string ccap_string(int cap) { string s; - /* - s = "["; - if (cap & CEPH_CAP_PIN) s += " pin"; - if (cap & CEPH_CAP_RDCACHE) s += " rdcache"; - if (cap & CEPH_CAP_RD) s += " rd"; - if (cap & CEPH_CAP_WR) s += " wr"; - if (cap & CEPH_CAP_WRBUFFER) s += " wrbuffer"; - if (cap & CEPH_CAP_WRBUFFER) s += " wrextend"; - if (cap & CEPH_CAP_LAZYIO) s += " lazyio"; - if (cap & CEPH_CAP_EXCL) s += " excl"; - s += " ]"; - */ if (cap & CEPH_CAP_PIN) s += "p"; - if (cap & CEPH_CAP_RDCACHE) s += "c"; - if (cap & CEPH_CAP_RD) s += "r"; - if (cap & CEPH_CAP_WR) s += "w"; - if (cap & CEPH_CAP_WRBUFFER) s += "b"; - if (cap & CEPH_CAP_WRBUFFER) s += "a"; - if (cap & CEPH_CAP_LAZYIO) s += "l"; - if (cap & CEPH_CAP_EXCL) s += "x"; + + int a = (cap >> CEPH_CAP_SAUTH) & 3; + if (a) s += " a(" + gcap_string(a) + ")"; + + a = (cap >> CEPH_CAP_SLINK) & 3; + if (a) s += " l(" + gcap_string(a) + ")"; + + a = (cap >> CEPH_CAP_SXATTR) & 3; + if (a) s += " x(" + gcap_string(a) + ")"; + + a = cap >> CEPH_CAP_SFILE; + if (a) s += " f(" + gcap_string(a) + ")"; + return s; } diff --git a/src/messages/MClientCaps.h b/src/messages/MClientCaps.h index 94602a676178e..2bc3da20e01f4 100644 --- a/src/messages/MClientCaps.h +++ b/src/messages/MClientCaps.h @@ -98,8 +98,8 @@ class MClientCaps : public Message { out << "client_caps(" << ceph_cap_op_name(head.op) << " ino " << inodeno_t(head.ino) << " seq " << head.seq - << " caps=" << cap_string(head.caps) - << " wanted=" << cap_string(head.wanted) + << " caps=" << ccap_string(head.caps) + << " wanted=" << ccap_string(head.wanted) << " size " << head.size << "/" << head.max_size; if (head.truncate_seq) out << " ts " << head.truncate_seq; -- 2.39.5