From 004898a73ebe4b9e248d827795430ca0fa6d6934 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 21 Jan 2009 15:34:31 -0800 Subject: [PATCH] mds: unique (per mds) cap_id A new cap_id is issued each time an MDS creates a new capability, and is checked when releasing a capability to ensure it is the same capability instance. This specifically avoids a race like: 1- mds issues cap 2- mds sends cap message 3- mds and client time out cap 4- client receives cap message, sends release 5- mds reissues cap on same inode 6- mds receives release, doesn't realize the release is for the old capability Checking seq numbers isn't enough because the MDS may reissue the cap any number of times between 3 and 6. --- src/client/Client.cc | 11 +++-- src/client/Client.h | 3 +- src/include/ceph_fs.h | 5 +- src/kernel/caps.c | 31 ++++++++----- src/kernel/inode.c | 4 +- src/kernel/super.h | 3 +- src/mds/CInode.cc | 29 ++++++++++++ src/mds/CInode.h | 23 +-------- src/mds/Capability.h | 7 ++- src/mds/Locker.cc | 95 ++++++++++++++++++++------------------ src/mds/MDCache.cc | 4 +- src/mds/MDCache.h | 1 + src/mds/Migrator.cc | 2 +- src/mds/Server.cc | 6 ++- src/messages/MClientCaps.h | 7 ++- 15 files changed, 138 insertions(+), 93 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index d48638092bbaf..936fb5ae170aa 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -389,7 +389,7 @@ void Client::update_inode(Inode *in, InodeStat *st, utime_t from, int mds) if (st->cap.caps) { if (in->snapid == CEPH_NOSNAP) - add_update_cap(in, mds, st->cap.caps, st->cap.seq, st->cap.mseq, inodeno_t(st->cap.realm)); + add_update_cap(in, mds, st->cap.cap_id, st->cap.caps, st->cap.seq, st->cap.mseq, inodeno_t(st->cap.realm)); else { in->snap_caps |= st->cap.caps; } @@ -1474,7 +1474,7 @@ void Client::send_cap(Inode *in, int mds, InodeCap *cap, int used, int want, int MClientCaps *m = new MClientCaps(op, in->ino(), 0, - cap->seq, + cap->cap_id, cap->seq, cap->issued, want, cap->flushing, @@ -1681,7 +1681,7 @@ void Client::flush_snaps(Inode *in) << " on " << *in << dendl; if (p->second.dirty_data || p->second.writing) continue; - MClientCaps *m = new MClientCaps(CEPH_CAP_OP_FLUSHSNAP, in->ino(), in->snaprealm->ino, mseq); + MClientCaps *m = new MClientCaps(CEPH_CAP_OP_FLUSHSNAP, in->ino(), in->snaprealm->ino, 0, mseq); m->head.snap_follows = p->first; m->head.size = p->second.size; m->head.caps = p->second.issued; @@ -1764,7 +1764,7 @@ void Client::_flushed(Inode *in) * handle caps update from mds. including mds to mds caps transitions. * do not block. */ -void Client::add_update_cap(Inode *in, int mds, +void Client::add_update_cap(Inode *in, int mds, __u64 cap_id, unsigned issued, unsigned seq, unsigned mseq, inodeno_t realm) { InodeCap *cap = 0; @@ -1790,6 +1790,7 @@ void Client::add_update_cap(Inode *in, int mds, } unsigned old_caps = cap->issued; + cap->cap_id = cap_id; cap->issued |= issued; cap->implemented |= issued; cap->seq = seq; @@ -2097,7 +2098,7 @@ void Client::handle_cap_import(Inode *in, MClientCaps *m) // add/update it update_snap_trace(m->snapbl); - add_update_cap(in, mds, + add_update_cap(in, mds, m->get_cap_id(), m->get_caps(), m->get_seq(), m->get_mseq(), m->get_realm()); if (m->get_mseq() > in->exporting_mseq) { diff --git a/src/client/Client.h b/src/client/Client.h index af6be91239c15..c6bff9ac1151c 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -160,6 +160,7 @@ inline ostream& operator<<(ostream& out, const SnapRealm& r) { } struct InodeCap { + __u64 cap_id; unsigned issued; unsigned implemented; unsigned wanted; // as known to mds. @@ -829,7 +830,7 @@ protected: void release_lease(Inode *in, Dentry *dn, int mask); // file caps - void add_update_cap(Inode *in, int mds, + void add_update_cap(Inode *in, int mds, __u64 cap_id, unsigned issued, unsigned seq, unsigned mseq, inodeno_t realm); void remove_cap(Inode *in, int mds); void remove_all_caps(Inode *in); diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index fab4c82990fe6..5c6c456a1ab48 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -44,7 +44,7 @@ #define CEPH_MDS_PROTOCOL 5 /* cluster internal */ #define CEPH_MON_PROTOCOL 4 /* cluster internal */ #define CEPH_OSDC_PROTOCOL 4 /* public/client */ -#define CEPH_MDSC_PROTOCOL 5 /* public/client */ +#define CEPH_MDSC_PROTOCOL 6 /* public/client */ #define CEPH_MONC_PROTOCOL 6 /* public/client */ @@ -827,6 +827,7 @@ struct ceph_frag_tree_head { struct ceph_mds_reply_cap { __le32 caps, wanted; + __le64 cap_id; __le32 seq, mseq; __le64 realm; __le32 ttl_ms; /* ttl, in ms. if readonly and unwanted. */ @@ -1031,6 +1032,7 @@ static inline const char *ceph_cap_op_name(int op) struct ceph_mds_caps { __le32 op; __le64 ino, realm; + __le64 cap_id; __le32 seq; __le32 caps, wanted, dirty; __le32 migrate_seq; @@ -1074,6 +1076,7 @@ struct ceph_mds_lease { /* client reconnect */ struct ceph_mds_cap_reconnect { + __le64 cap_id; __le32 wanted; __le32 issued; __le64 size; diff --git a/src/kernel/caps.c b/src/kernel/caps.c index 10de06ce6fbff..da1ae54dcb912 100644 --- a/src/kernel/caps.c +++ b/src/kernel/caps.c @@ -176,7 +176,7 @@ static void __insert_cap_node(struct ceph_inode_info *ci, * @fmode can be negative, in which case it is ignored. */ int ceph_add_cap(struct inode *inode, - struct ceph_mds_session *session, + struct ceph_mds_session *session, u64 cap_id, int fmode, unsigned issued, unsigned wanted, unsigned seq, unsigned mseq, u64 realmino, unsigned ttl_ms, unsigned long ttl_from, @@ -188,8 +188,8 @@ int ceph_add_cap(struct inode *inode, int mds = session->s_mds; int is_first = 0; - dout(10, "add_cap on %p mds%d cap %s seq %d\n", inode, - session->s_mds, ceph_cap_string(issued), seq); + dout(10, "add_cap %p mds%d cap %llx %s seq %d\n", inode, + session->s_mds, cap_id, ceph_cap_string(issued), seq); retry: spin_lock(&inode->i_lock); cap = __get_cap_for_mds(inode, mds); @@ -260,6 +260,7 @@ retry: dout(10, "add_cap inode %p (%llx.%llx) cap %s now %s seq %d mds%d\n", inode, ceph_vinop(inode), ceph_cap_string(issued), ceph_cap_string(issued|cap->issued), seq, mds); + cap->cap_id = cap_id; cap->issued = issued; cap->implemented |= issued; cap->seq = seq; @@ -445,7 +446,7 @@ static void __cap_delay_cancel(struct ceph_mds_client *mdsc, * * Caller should be holding s_mutex. */ -static void send_cap_msg(struct ceph_mds_client *mdsc, u64 ino, int op, +static void send_cap_msg(struct ceph_mds_client *mdsc, u64 ino, u64 cid, int op, int caps, int wanted, int dirty, u64 seq, u64 mseq, u64 size, u64 max_size, struct timespec *mtime, struct timespec *atime, @@ -456,9 +457,9 @@ static void send_cap_msg(struct ceph_mds_client *mdsc, u64 ino, int op, struct ceph_mds_caps *fc; struct ceph_msg *msg; - dout(10, "send_cap_msg %s %llx caps %s wanted %s dirty %s seq %llu/%llu" - " follows %lld size %llu\n", ceph_cap_op_name(op), ino, - ceph_cap_string(caps), ceph_cap_string(wanted), + dout(10, "send_cap_msg %s %llx %llx caps %s wanted %s dirty %s" + " seq %llu/%llu follows %lld size %llu\n", ceph_cap_op_name(op), + cid, ino, ceph_cap_string(caps), ceph_cap_string(wanted), ceph_cap_string(dirty), seq, mseq, follows, size); @@ -470,6 +471,7 @@ static void send_cap_msg(struct ceph_mds_client *mdsc, u64 ino, int op, memset(fc, 0, sizeof(*fc)); + fc->cap_id = cpu_to_le64(cid); fc->op = cpu_to_le32(op); fc->seq = cpu_to_le32(seq); fc->migrate_seq = cpu_to_le32(mseq); @@ -514,6 +516,7 @@ static void __send_cap(struct ceph_mds_client *mdsc, { struct ceph_inode_info *ci = cap->ci; struct inode *inode = &ci->vfs_inode; + u64 cap_id = cap->cap_id; int held = cap->issued | cap->implemented; int revoking = cap->implemented & ~cap->issued; int dropping = cap->issued & ~retain; @@ -528,7 +531,7 @@ static void __send_cap(struct ceph_mds_client *mdsc, gid_t gid; int dirty; int flushing; - int last_cap = 0; + int last_cap = 0; dout(10, "__send_cap cap %p session %p %s -> %s (revoking %s)\n", cap, cap->session, @@ -587,7 +590,7 @@ static void __send_cap(struct ceph_mds_client *mdsc, invalidate_mapping_pages(&inode->i_data, 0, -1); } - send_cap_msg(mdsc, ceph_vino(inode).ino, + send_cap_msg(mdsc, ceph_vino(inode).ino, cap_id, op, keep, want, flushing, seq, mseq, size, max_size, &mtime, &atime, time_warp_seq, uid, gid, mode, @@ -674,7 +677,7 @@ retry: dout(10, "flush_snaps %p cap_snap %p follows %lld size %llu\n", inode, capsnap, next_follows, capsnap->size); - send_cap_msg(mdsc, ceph_vino(inode).ino, + send_cap_msg(mdsc, ceph_vino(inode).ino, 0, CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, capsnap->dirty, 0, mseq, capsnap->size, 0, @@ -1543,6 +1546,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, unsigned mseq = le32_to_cpu(im->migrate_seq); u64 realmino = le64_to_cpu(im->realm); unsigned long ttl_ms = le32_to_cpu(im->ttl_ms); + u64 cap_id = le64_to_cpu(im->cap_id); if (ci->i_cap_exporting_mds >= 0 && ceph_seq_cmp(ci->i_cap_exporting_mseq, mseq) < 0) { @@ -1562,7 +1566,8 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, ceph_update_snap_trace(mdsc, snaptrace, snaptrace+snaptrace_len, false); downgrade_write(&mdsc->snap_rwsem); - ceph_add_cap(inode, session, -1, issued, wanted, seq, mseq, realmino, + ceph_add_cap(inode, session, cap_id, -1, + issued, wanted, seq, mseq, realmino, ttl_ms, jiffies - ttl_ms/2, NULL); up_read(&mdsc->snap_rwsem); } @@ -1586,6 +1591,7 @@ void ceph_handle_caps(struct ceph_mds_client *mdsc, int op; u32 seq; struct ceph_vino vino; + u64 cap_id; u64 size, max_size; int check_caps = 0; void *xattr_data = NULL; @@ -1600,6 +1606,7 @@ void ceph_handle_caps(struct ceph_mds_client *mdsc, op = le32_to_cpu(h->op); vino.ino = le64_to_cpu(h->ino); vino.snap = CEPH_NOSNAP; + cap_id = le64_to_cpu(h->cap_id); seq = le32_to_cpu(h->seq); size = le64_to_cpu(h->size); max_size = le64_to_cpu(h->max_size); @@ -1706,7 +1713,7 @@ bad: return; release: - send_cap_msg(mdsc, vino.ino, CEPH_CAP_OP_RELEASE, + send_cap_msg(mdsc, vino.ino, cap_id, CEPH_CAP_OP_RELEASE, 0, 0, 0, seq, 0, size, 0, NULL, NULL, 0, diff --git a/src/kernel/inode.c b/src/kernel/inode.c index 6eaea34cf91ea..e302f709d4348 100644 --- a/src/kernel/inode.c +++ b/src/kernel/inode.c @@ -504,7 +504,9 @@ no_change: /* were we issued a capability? */ if (info->cap.caps) { if (ceph_snap(inode) == CEPH_NOSNAP) { - ceph_add_cap(inode, session, cap_fmode, + ceph_add_cap(inode, session, + le64_to_cpu(info->cap.cap_id), + cap_fmode, le32_to_cpu(info->cap.caps), le32_to_cpu(info->cap.wanted), le32_to_cpu(info->cap.seq), diff --git a/src/kernel/super.h b/src/kernel/super.h index d4ca6e7c1de0d..9f043e0fd3b5a 100644 --- a/src/kernel/super.h +++ b/src/kernel/super.h @@ -134,6 +134,7 @@ struct ceph_cap { struct list_head session_caps; /* per-session caplist */ struct list_head session_rdcaps; /* per-session rdonly caps */ int mds; + u64 cap_id; int issued; /* latest, from the mds */ int implemented; /* what we've implemented (for tracking revocation) */ int flushing; /* dirty fields being written back to mds */ @@ -701,7 +702,7 @@ extern const char *ceph_cap_string(int c); extern void ceph_handle_caps(struct ceph_mds_client *mdsc, struct ceph_msg *msg); extern int ceph_add_cap(struct inode *inode, - struct ceph_mds_session *session, + struct ceph_mds_session *session, u64 cap_id, int fmode, unsigned issued, unsigned wanted, unsigned cap, unsigned seq, u64 realmino, unsigned ttl_ms, unsigned long ttl_from, diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 785eb29923970..34aebd7aa79a1 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -493,6 +493,34 @@ void CInode::name_stray_dentry(string& dname) } +Capability *CInode::add_client_cap(int client, Session *session, + xlist *rdcaps_list, SnapRealm *conrealm) +{ + if (client_caps.empty()) { + get(PIN_CAPS); + if (conrealm) + containing_realm = conrealm; + else + containing_realm = find_snaprealm(); + containing_realm->inodes_with_caps.push_back(&xlist_caps); + } + + assert(client_caps.count(client) == 0); + Capability *cap = client_caps[client] = new Capability(this, ++mdcache->last_cap_id, client, rdcaps_list); + if (session) + session->add_cap(cap); + + cap->client_follows = first-1; + + containing_realm->add_cap(client, cap); + + return cap; +} + + + + + version_t CInode::pre_dirty() { assert(parent || projected_parent.size()); @@ -1422,6 +1450,7 @@ bool CInode::encode_inodestat(bufferlist& bl, Session *session, cap->touch(); // move to back of session cap LRU e.cap.caps = issue; e.cap.wanted = cap->wanted(); + e.cap.cap_id = cap->get_cap_id(); e.cap.seq = cap->get_last_seq(); dout(10) << "encode_inodestat issueing " << ccap_string(issue) << " seq " << cap->get_last_seq() << dendl; e.cap.mseq = cap->get_mseq(); diff --git a/src/mds/CInode.h b/src/mds/CInode.h index c8feae4b8dff0..8161d01775b71 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -548,27 +548,8 @@ public: if (c) return c->pending(); return 0; } - Capability *add_client_cap(int client, Session *session, xlist *rdcaps_list, SnapRealm *conrealm=0) { - if (client_caps.empty()) { - get(PIN_CAPS); - if (conrealm) - containing_realm = conrealm; - else - containing_realm = find_snaprealm(); - containing_realm->inodes_with_caps.push_back(&xlist_caps); - } - - assert(client_caps.count(client) == 0); - Capability *cap = client_caps[client] = new Capability(this, client, rdcaps_list); - if (session) - session->add_cap(cap); - - cap->client_follows = first-1; - - containing_realm->add_cap(client, cap); - - return cap; - } + Capability *add_client_cap(int client, Session *session, + xlist *rdcaps_list, SnapRealm *conrealm=0); void remove_client_cap(int client) { assert(client_caps.count(client) == 1); Capability *cap = client_caps[client]; diff --git a/src/mds/Capability.h b/src/mds/Capability.h index f8c9a9571440c..dd60740a70d41 100644 --- a/src/mds/Capability.h +++ b/src/mds/Capability.h @@ -98,6 +98,8 @@ private: CInode *inode; int client; + __u64 cap_id; + __u32 _wanted; // what the client wants (ideally) utime_t last_issue_stamp; @@ -216,8 +218,9 @@ public: xlist::item snaprealm_caps_item; - Capability(CInode *i, int c, xlist *rl) : + Capability(CInode *i, __u64 id, int c, xlist *rl) : inode(i), client(c), + cap_id(id), _wanted(0), _pending(0), _issued(0), _num_revoke(0), last_sent(0), @@ -237,6 +240,8 @@ public: void set_last_issue_stamp(utime_t t) { last_issue_stamp = t; } + __u64 get_cap_id() { return cap_id; } + //ceph_seq_t get_last_issue() { return last_issue; } bool is_suppress() { return suppress > 0; } diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index b3bea2d9ab295..d4b0496c7101d 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -1031,7 +1031,7 @@ bool Locker::issue_caps(CInode *in) MClientCaps *m = new MClientCaps(CEPH_CAP_OP_GRANT, in->ino(), in->find_snaprealm()->inode->ino(), - cap->get_last_seq(), + cap->get_cap_id(), cap->get_last_seq(), after, wanted, 0, cap->get_mseq()); in->encode_cap_message(m, cap); @@ -1064,7 +1064,7 @@ void Locker::issue_truncate(CInode *in) MClientCaps *m = new MClientCaps(CEPH_CAP_OP_TRUNC, in->ino(), in->find_snaprealm()->inode->ino(), - cap->get_last_seq(), + cap->get_cap_id(), cap->get_last_seq(), cap->pending(), cap->wanted(), 0, cap->get_mseq()); in->encode_cap_message(m, cap); @@ -1355,7 +1355,7 @@ void Locker::share_inode_max_size(CInode *in) MClientCaps *m = new MClientCaps(CEPH_CAP_OP_GRANT, in->ino(), in->find_snaprealm()->inode->ino(), - cap->get_last_seq(), + cap->get_cap_id(), cap->get_last_seq(), cap->pending(), cap->wanted(), 0, cap->get_mseq()); in->encode_cap_message(m, cap); @@ -1443,7 +1443,7 @@ void Locker::handle_client_caps(MClientCaps *m) // case we get a dup response, so whatever.) MClientCaps *ack = 0; if (m->get_dirty()) { - ack = new MClientCaps(CEPH_CAP_OP_FLUSHSNAP_ACK, in->ino(), 0, 0, 0, 0, m->get_dirty(), 0); + ack = new MClientCaps(CEPH_CAP_OP_FLUSHSNAP_ACK, in->ino(), 0, 0, 0, 0, 0, m->get_dirty(), 0); ack->set_snap_follows(follows); } if (!_do_cap_update(in, cap, m->get_dirty(), 0, follows, m, ack)) { @@ -1459,52 +1459,55 @@ void Locker::handle_client_caps(MClientCaps *m) // for this and all subsequent versions of this inode, while (1) { - // filter wanted based on what we could ever give out (given auth/replica status) - int wanted = m->get_wanted() & head_in->get_caps_allowed_ever(); - cap->confirm_receipt(m->get_seq(), m->get_caps()); - dout(10) << " follows " << follows - << " retains " << ccap_string(m->get_caps()) - << " dirty " << ccap_string(m->get_caps()) - << " on " << *in << dendl; - - MClientCaps *ack = 0; - ceph_seq_t releasecap = 0; - - if (m->get_dirty() && in->is_auth()) { - dout(7) << " flush client" << client << " dirty " << ccap_string(m->get_dirty()) - << " seq " << m->get_seq() << " on " << *in << dendl; - ack = new MClientCaps(CEPH_CAP_OP_FLUSH_ACK, in->ino(), 0, m->get_seq(), - m->get_caps(), 0, m->get_dirty(), 0); - } - if (m->get_caps() == 0) { + if (cap->get_cap_id() != m->get_cap_id()) { + dout(7) << " ignoring client capid " << m->get_cap_id() << " != my " << cap->get_cap_id() << dendl; + } else { + // filter wanted based on what we could ever give out (given auth/replica status) + int wanted = m->get_wanted() & head_in->get_caps_allowed_ever(); + cap->confirm_receipt(m->get_seq(), m->get_caps()); + dout(10) << " follows " << follows + << " retains " << ccap_string(m->get_caps()) + << " dirty " << ccap_string(m->get_caps()) + << " on " << *in << dendl; - assert(ceph_seq_cmp(m->get_seq(), cap->get_last_sent()) <= 0); - if (m->get_seq() == cap->get_last_sent()) { - dout(7) << " releasing request client" << client << " seq " << m->get_seq() << " on " << *in << dendl; - releasecap = m->get_seq(); - } else { - dout(7) << " NOT releasing request client" << client << " seq " << m->get_seq() - << " < last_sent " << cap->get_last_sent() << " on " << *in << dendl; + MClientCaps *ack = 0; + ceph_seq_t releasecap = 0; + + if (m->get_dirty() && in->is_auth()) { + dout(7) << " flush client" << client << " dirty " << ccap_string(m->get_dirty()) + << " seq " << m->get_seq() << " on " << *in << dendl; + ack = new MClientCaps(CEPH_CAP_OP_FLUSH_ACK, in->ino(), 0, cap->get_cap_id(), m->get_seq(), + m->get_caps(), 0, m->get_dirty(), 0); + } + if (m->get_caps() == 0) { + assert(ceph_seq_cmp(m->get_seq(), cap->get_last_sent()) <= 0); + if (m->get_seq() == cap->get_last_sent()) { + dout(7) << " releasing request client" << client << " seq " << m->get_seq() << " on " << *in << dendl; + releasecap = m->get_seq(); + } else { + dout(7) << " NOT releasing request client" << client << " seq " << m->get_seq() + << " < last_sent " << cap->get_last_sent() << " on " << *in << dendl; + } + } + if (wanted != cap->wanted()) { + dout(10) << " wanted " << ccap_string(cap->wanted()) + << " -> " << ccap_string(wanted) << dendl; + cap->set_wanted(wanted); } - } - if (wanted != cap->wanted()) { - dout(10) << " wanted " << ccap_string(cap->wanted()) - << " -> " << ccap_string(wanted) << dendl; - cap->set_wanted(wanted); - } - - if (!_do_cap_update(in, cap, m->get_dirty(), m->get_wanted(), follows, m, ack, releasecap)) { - // no update, ack now. - if (releasecap) - _finish_release_cap(in, client, releasecap, ack); - else if (ack) - mds->send_message_client(ack, client); - eval_cap_gather(in); - if (in->filelock.is_stable()) - file_eval(&in->filelock); + if (!_do_cap_update(in, cap, m->get_dirty(), m->get_wanted(), follows, m, ack, releasecap)) { + // no update, ack now. + if (releasecap) + _finish_release_cap(in, client, releasecap, ack); + else if (ack) + mds->send_message_client(ack, client); + + eval_cap_gather(in); + if (in->filelock.is_stable()) + file_eval(&in->filelock); + } } - + // done? if (in->last == CEPH_NOSNAP) break; diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 356b331bcb5b0..6b133862763df 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -106,6 +106,8 @@ MDCache::MDCache(MDS *m) did_shutdown_log_cap = false; + last_cap_id = 0; + client_lease_durations[0] = 5.0; client_lease_durations[1] = 30.0; client_lease_durations[2] = 300.0; @@ -3659,7 +3661,7 @@ void MDCache::do_cap_import(Session *session, CInode *in, Capability *cap) MClientCaps *reap = new MClientCaps(CEPH_CAP_OP_IMPORT, in->ino(), realm->inode->ino(), - cap->get_last_seq(), + cap->get_cap_id(), cap->get_last_seq(), cap->pending(), cap->wanted(), 0, cap->get_mseq()); in->encode_cap_message(reap, cap); diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 2012f0b82653c..956da55c62e9b 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -416,6 +416,7 @@ public: void trim_client_leases(); // -- client caps -- + __u64 last_cap_id; xlist client_rdcaps; void trim_client_rdcaps(); diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index bf93b0d6bb47c..0cef1c7002d85 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -906,7 +906,7 @@ void Migrator::finish_export_inode_caps(CInode *in) MClientCaps *m = new MClientCaps(CEPH_CAP_OP_EXPORT, in->ino(), in->find_snaprealm()->inode->ino(), - cap->get_last_seq(), + cap->get_cap_id(), cap->get_last_seq(), cap->pending(), cap->wanted(), 0, cap->get_mseq()); mds->send_message_client(m, it->first); diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 05c94e1849826..da7cdec1984cb 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -428,6 +428,10 @@ void Server::handle_client_reconnect(MClientReconnect *m) for (map::iterator p = m->caps.begin(); p != m->caps.end(); ++p) { + // make sure our last_cap_id is MAX over all issued caps + if (p->second.capinfo.cap_id > mdcache->last_cap_id) + mdcache->last_cap_id = p->second.capinfo.cap_id; + CInode *in = mdcache->get_inode(p->first); if (in && in->is_auth()) { // we recovered it, and it's ours. take note. @@ -449,7 +453,7 @@ void Server::handle_client_reconnect(MClientReconnect *m) inode_t fake_inode; memset(&fake_inode, 0, sizeof(fake_inode)); fake_inode.ino = p->first; - MClientCaps *stale = new MClientCaps(CEPH_CAP_OP_EXPORT, p->first, 0, 0); + MClientCaps *stale = new MClientCaps(CEPH_CAP_OP_EXPORT, p->first, 0, 0, 0); //stale->head.migrate_seq = 0; // FIXME ****** mds->send_message_client(stale, m->get_source_inst()); diff --git a/src/messages/MClientCaps.h b/src/messages/MClientCaps.h index 955c25e3fab4e..974f559485887 100644 --- a/src/messages/MClientCaps.h +++ b/src/messages/MClientCaps.h @@ -32,6 +32,7 @@ class MClientCaps : public Message { inodeno_t get_ino() { return inodeno_t(head.ino); } inodeno_t get_realm() { return inodeno_t(head.realm); } + __u64 get_cap_id() { return head.cap_id; } __u64 get_size() { return head.size; } __u64 get_max_size() { return head.max_size; } @@ -65,6 +66,7 @@ class MClientCaps : public Message { MClientCaps(int op, inodeno_t ino, inodeno_t realm, + __u64 id, long seq, int caps, int wanted, @@ -75,6 +77,7 @@ class MClientCaps : public Message { head.op = op; head.ino = ino; head.realm = realm; + head.cap_id = id; head.seq = seq; head.caps = caps; head.wanted = wanted; @@ -83,12 +86,13 @@ class MClientCaps : public Message { } MClientCaps(int op, inodeno_t ino, inodeno_t realm, - int mseq) : + __u64 id, int mseq) : Message(CEPH_MSG_CLIENT_CAPS) { memset(&head, 0, sizeof(head)); head.op = op; head.ino = ino; head.realm = realm; + head.cap_id = id; head.migrate_seq = mseq; } @@ -96,6 +100,7 @@ class MClientCaps : public Message { void print(ostream& out) { out << "client_caps(" << ceph_cap_op_name(head.op) << " ino " << inodeno_t(head.ino) + << " " << head.cap_id << " seq " << head.seq << " caps=" << ccap_string(head.caps) << " dirty=" << ccap_string(head.dirty) -- 2.39.5