From 1961fb75ebe5fce2eabe3295ff8d12b667d0912c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 9 Jul 2009 15:41:23 -0700 Subject: [PATCH] kclient: include tid in cap flush, flushsnap This avoids (dirty 1, flush 1, dirty 2, flush 2, flush_ack 1 -> mark clean) badness. --- src/kernel/caps.c | 68 +++++++++++++++++++++++++++-------------- src/kernel/mds_client.c | 1 + src/kernel/mds_client.h | 1 + src/kernel/super.h | 4 +-- 4 files changed, 49 insertions(+), 25 deletions(-) diff --git a/src/kernel/caps.c b/src/kernel/caps.c index 5a4f2128acb9..18cec151776e 100644 --- a/src/kernel/caps.c +++ b/src/kernel/caps.c @@ -790,7 +790,7 @@ void __ceph_remove_cap(struct ceph_cap *cap, */ static void send_cap_msg(struct ceph_mds_client *mdsc, u64 ino, u64 cid, int op, int caps, int wanted, int dirty, - u32 seq, u32 issue_seq, u32 mseq, + u32 seq, u64 flush_tid, u32 issue_seq, u32 mseq, u64 size, u64 max_size, struct timespec *mtime, struct timespec *atime, u64 time_warp_seq, @@ -822,6 +822,7 @@ static void send_cap_msg(struct ceph_mds_client *mdsc, u64 ino, u64 cid, int op, fc->cap_id = cpu_to_le64(cid); fc->op = cpu_to_le32(op); fc->seq = cpu_to_le32(seq); + fc->client_tid = cpu_to_le64(flush_tid); fc->issue_seq = cpu_to_le32(issue_seq); fc->migrate_seq = cpu_to_le32(mseq); fc->caps = cpu_to_le32(caps); @@ -944,6 +945,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, int xattrs_blob_size = 0; u64 xattr_version = 0; int delayed = 0; + u64 flush_tid = 0; dout(10, "__send_cap %p cap %p session %p %s -> %s (revoking %s)\n", inode, cap, cap->session, @@ -978,6 +980,12 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, cap->implemented &= cap->issued | used; cap->mds_wanted = want; + if (flushing) { + flush_tid = ++cap->session->s_cap_flush_tid; + ci->i_cap_flush_tid = flush_tid; + dout(10, " cap_flush_tid %lld\n", flush_tid); + } + keep = cap->implemented; seq = cap->seq; issue_seq = cap->issue_seq; @@ -1010,7 +1018,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, } send_cap_msg(mdsc, ceph_vino(inode).ino, cap_id, - op, keep, want, flushing, seq, issue_seq, mseq, + op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, size, max_size, &mtime, &atime, time_warp_seq, uid, gid, mode, xattr_version, @@ -1091,6 +1099,7 @@ retry: goto retry; } + capsnap->flush_tid = ++session->s_cap_flush_tid; atomic_inc(&capsnap->nref); spin_unlock(&inode->i_lock); @@ -1098,7 +1107,7 @@ retry: inode, capsnap, next_follows, capsnap->size); send_cap_msg(mdsc, ceph_vino(inode).ino, 0, CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, - capsnap->dirty, 0, 0, mseq, + capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, capsnap->size, 0, &capsnap->mtime, &capsnap->atime, capsnap->time_warp_seq, @@ -2071,6 +2080,7 @@ static void handle_cap_flush_ack(struct inode *inode, struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; unsigned seq = le32_to_cpu(m->seq); int cleaned = le32_to_cpu(m->dirty); + u64 flush_tid = le64_to_cpu(m->client_tid); int old_dirty, new_dirty; dout(10, "handle_cap_flush_ack inode %p mds%d seq %d cleaned %s," @@ -2078,27 +2088,32 @@ static void handle_cap_flush_ack(struct inode *inode, inode, session->s_mds, seq, ceph_cap_string(cleaned), ceph_cap_string(ci->i_flushing_caps), ceph_cap_string(ci->i_flushing_caps & ~cleaned)); - old_dirty = ci->i_dirty_caps | ci->i_flushing_caps; - ci->i_flushing_caps &= ~cleaned; - new_dirty = ci->i_dirty_caps | ci->i_flushing_caps; - if (old_dirty) { - spin_lock(&mdsc->cap_dirty_lock); - list_del_init(&ci->i_flushing_item); - if (!list_empty(&session->s_cap_flushing)) - dout(20, " mds%d still flushing cap on %p\n", - session->s_mds, - &list_entry(session->s_cap_flushing.next, - struct ceph_inode_info, - i_flushing_item)->vfs_inode); - mdsc->num_cap_flushing--; - wake_up(&mdsc->cap_flushing_wq); - dout(20, " inode %p now !flushing\n", inode); - if (!new_dirty) { - dout(20, " inode %p now clean\n", inode); - list_del_init(&ci->i_dirty_item); + if (flush_tid != ci->i_cap_flush_tid) { + dout(10, " flush_tid %lld != my flush_tid %lld, ignoring\n", + flush_tid, ci->i_cap_flush_tid); + } else { + old_dirty = ci->i_dirty_caps | ci->i_flushing_caps; + ci->i_flushing_caps &= ~cleaned; + new_dirty = ci->i_dirty_caps | ci->i_flushing_caps; + if (old_dirty) { + spin_lock(&mdsc->cap_dirty_lock); + list_del_init(&ci->i_flushing_item); + if (!list_empty(&session->s_cap_flushing)) + dout(20, " mds%d still flushing cap on %p\n", + session->s_mds, + &list_entry(session->s_cap_flushing.next, + struct ceph_inode_info, + i_flushing_item)->vfs_inode); + mdsc->num_cap_flushing--; + wake_up(&mdsc->cap_flushing_wq); + dout(20, " inode %p now !flushing\n", inode); + if (!new_dirty) { + dout(20, " inode %p now clean\n", inode); + list_del_init(&ci->i_dirty_item); + } + spin_unlock(&mdsc->cap_dirty_lock); + wake_up(&ci->i_cap_wq); } - spin_unlock(&mdsc->cap_dirty_lock); - wake_up(&ci->i_cap_wq); } spin_unlock(&inode->i_lock); @@ -2118,6 +2133,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, { struct ceph_inode_info *ci = ceph_inode(inode); u64 follows = le64_to_cpu(m->snap_follows); + u64 flush_tid = le64_to_cpu(m->client_tid); struct ceph_cap_snap *capsnap; int drop = 0; @@ -2127,6 +2143,12 @@ static void handle_cap_flushsnap_ack(struct inode *inode, spin_lock(&inode->i_lock); list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { if (capsnap->follows == follows) { + if (capsnap->flush_tid != flush_tid) { + dout(10, " cap_snap %p follows %lld tid %lld !=" + " %lld\n", capsnap, follows, + flush_tid, capsnap->flush_tid); + break; + } WARN_ON(capsnap->dirty_pages || capsnap->writing); dout(10, " removing cap_snap %p follows %lld\n", capsnap, follows); diff --git a/src/kernel/mds_client.c b/src/kernel/mds_client.c index 648fbcaab000..e5e8b753b417 100644 --- a/src/kernel/mds_client.c +++ b/src/kernel/mds_client.c @@ -308,6 +308,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, INIT_LIST_HEAD(&s->s_cap_releases); INIT_LIST_HEAD(&s->s_cap_releases_done); INIT_LIST_HEAD(&s->s_cap_flushing); + s->s_cap_flush_tid = 0; dout(10, "register_session mds%d\n", mds); if (mds >= mdsc->max_sessions) { diff --git a/src/kernel/mds_client.h b/src/kernel/mds_client.h index 934ec412fa60..b7f78a6bd1a5 100644 --- a/src/kernel/mds_client.h +++ b/src/kernel/mds_client.h @@ -130,6 +130,7 @@ struct ceph_mds_session { struct list_head s_cap_releases_done; /* ready to send */ struct list_head s_cap_flushing; /* inodes w/ flushing caps */ + u64 s_cap_flush_tid; }; /* diff --git a/src/kernel/super.h b/src/kernel/super.h index e5e8b2d3f7cf..d8b2ae73a388 100644 --- a/src/kernel/super.h +++ b/src/kernel/super.h @@ -187,7 +187,7 @@ struct ceph_cap_snap { atomic_t nref; struct list_head ci_item; - u64 follows; + u64 follows, flush_tid; int issued, dirty; struct ceph_snap_context *context; @@ -308,7 +308,7 @@ struct ceph_inode_info { struct ceph_cap *i_auth_cap; /* authoritative cap, if any */ unsigned i_dirty_caps, i_flushing_caps; /* mask of dirtied fields */ struct list_head i_dirty_item, i_flushing_item; - u64 i_cap_flush_seq; + u64 i_cap_flush_seq, i_cap_flush_tid; wait_queue_head_t i_cap_wq; /* threads waiting on a capability */ unsigned long i_hold_caps_min; /* jiffies */ unsigned long i_hold_caps_max; /* jiffies */ -- 2.47.3