From 886091eff1e561f7e92a6fec1f61f7cd5f6af63b Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 4 Jan 2009 15:41:03 -0800 Subject: [PATCH] kclient: ref count cap_snap to avoid unnecessarily copying into temp variables --- src/TODO | 3 +++ src/kernel/caps.c | 44 +++++++++++++++++++++++--------------------- src/kernel/snap.c | 10 ++++++++++ src/kernel/super.h | 18 +++++++++++++++++- 4 files changed, 53 insertions(+), 22 deletions(-) diff --git a/src/TODO b/src/TODO index 15240a73b3fe7..4fe87604f78a7 100644 --- a/src/TODO +++ b/src/TODO @@ -50,6 +50,9 @@ caps - client should not get unsolicited MClientCaps if wanted==0. - if we do un-acked cap release, we need to handle unsolicited import/export - may unacked release _only_ if wanted==0? +- maybe we really want a 'dirty' mask + - client knows when it has dirty data to writeback, and thus when it must wait for it to flush? + - kclient - only pin caps with wanted != 0? - put unwanted caps on an lru list; expire diff --git a/src/kernel/caps.c b/src/kernel/caps.c index 1f1f0535396a0..b3bcbd26dbc7c 100644 --- a/src/kernel/caps.c +++ b/src/kernel/caps.c @@ -307,7 +307,9 @@ static void send_cap_msg(struct ceph_mds_client *mdsc, u64 ino, int op, int caps, int wanted, u64 seq, u64 mseq, u64 size, u64 max_size, struct timespec *mtime, struct timespec *atime, - u64 time_warp_seq, u64 follows, int mds) + u64 time_warp_seq, + uid_t uid, gid_t gid, mode_t mode, + u64 follows, int mds) { struct ceph_mds_caps *fc; struct ceph_msg *msg; @@ -369,6 +371,9 @@ static void __send_cap(struct ceph_mds_client *mdsc, struct timespec mtime, atime; int wake = 0; int op = CEPH_CAP_OP_ACK; + mode_t mode; + uid_t uid; + gid_t gid; if (wanted == 0) op = CEPH_CAP_OP_RELEASE; @@ -399,6 +404,9 @@ static void __send_cap(struct ceph_mds_client *mdsc, atime = inode->i_atime; time_warp_seq = ci->i_time_warp_seq; follows = ci->i_snap_realm->cached_context->seq; + uid = inode->i_uid; + gid = inode->i_gid; + mode = inode->i_mode; spin_unlock(&inode->i_lock); if (dropping & CEPH_CAP_FILE_RDCACHE) { @@ -410,6 +418,7 @@ static void __send_cap(struct ceph_mds_client *mdsc, send_cap_msg(mdsc, ceph_vino(inode).ino, op, keep, wanted, seq, mseq, size, max_size, &mtime, &atime, time_warp_seq, + uid, gid, mode, follows, session->s_mds); if (wake) @@ -433,11 +442,6 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci, int mds; struct list_head *p; struct ceph_cap_snap *capsnap; - u64 follows; - int issued; - u64 size; - struct timespec mtime, atime, ctime; - u64 time_warp_seq; u32 mseq; struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; struct ceph_mds_session *session = NULL; /* if session != NULL, we hold @@ -491,25 +495,23 @@ retry: goto retry; } - follows = capsnap->follows; - size = capsnap->size; - atime = capsnap->atime; - mtime = capsnap->mtime; - ctime = capsnap->ctime; - time_warp_seq = capsnap->time_warp_seq; - issued = capsnap->issued; + atomic_inc(&capsnap->nref); spin_unlock(&inode->i_lock); dout(10, "flush_snaps %p cap_snap %p follows %lld size %llu\n", - inode, capsnap, next_follows, size); + inode, capsnap, next_follows, capsnap->size); send_cap_msg(mdsc, ceph_vino(inode).ino, - CEPH_CAP_OP_FLUSHSNAP, issued, 0, 0, mseq, - size, 0, - &mtime, &atime, time_warp_seq, - follows, mds); + CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, 0, mseq, + capsnap->size, 0, + &capsnap->mtime, &capsnap->atime, + capsnap->time_warp_seq, + capsnap->uid, capsnap->gid, capsnap->mode, + capsnap->follows, mds); + + next_follows = capsnap->follows + 1; + ceph_put_cap_snap(capsnap); spin_lock(&inode->i_lock); - next_follows = follows + 1; goto retry; } @@ -1155,7 +1157,7 @@ static void handle_cap_flushedsnap(struct inode *inode, capsnap, follows); ceph_put_snap_context(capsnap->context); list_del(&capsnap->ci_item); - kfree(capsnap); + ceph_put_cap_snap(capsnap); drop = 1; break; } else { @@ -1365,7 +1367,7 @@ void ceph_handle_caps(struct ceph_mds_client *mdsc, if (!inode) { dout(10, " i don't have ino %llx, sending release\n", vino.ino); send_cap_msg(mdsc, vino.ino, CEPH_CAP_OP_RELEASE, 0, 0, seq, - size, 0, 0, NULL, NULL, 0, 0, mds); + size, 0, 0, NULL, NULL, 0, 0, 0, 0, 0, mds); goto no_inode; } diff --git a/src/kernel/snap.c b/src/kernel/snap.c index 87afc6428368c..8d8080f886c0c 100644 --- a/src/kernel/snap.c +++ b/src/kernel/snap.c @@ -310,6 +310,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci, derr(10, "ENOMEM allocating ceph_cap_snap on %p\n", inode); return; } + atomic_set(&capsnap->nref, 1); spin_lock(&inode->i_lock); used = __ceph_caps_used(ci); @@ -326,6 +327,15 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci, capsnap->follows = snapc->seq - 1; capsnap->context = ceph_get_snap_context(snapc); capsnap->issued = __ceph_caps_issued(ci, NULL); + + capsnap->mode = inode->i_mode; + capsnap->uid = inode->i_uid; + capsnap->gid = inode->i_gid; + + /* fixme? */ + capsnap->xattr_blob = 0; + capsnap->xattr_len = 0; + /* dirty page count moved from _head to this cap_snap; all subsequent writes page dirties occur _after_ this snapshot. */ diff --git a/src/kernel/super.h b/src/kernel/super.h index 82b1efa8db652..91db5b7bae736 100644 --- a/src/kernel/super.h +++ b/src/kernel/super.h @@ -144,17 +144,33 @@ struct ceph_cap { * data before flushing the snapped state (tracked here) back to the MDS. */ struct ceph_cap_snap { + atomic_t nref; + struct list_head ci_item; u64 follows; int issued; + struct ceph_snap_context *context; + + mode_t mode; + uid_t uid; + gid_t gid; + + void *xattr_blob; + int xattr_len; + u64 size; struct timespec mtime, atime, ctime; u64 time_warp_seq; - struct ceph_snap_context *context; int writing; /* a sync write is still in progress */ int dirty; /* dirty pages awaiting writeback */ }; +static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) +{ + if (atomic_dec_and_test(&capsnap->nref)) + kfree(capsnap); +} + /* * The frag tree describes how a directory is fragmented, potentially across * multiple metadata servers. It is also used to indicate points where -- 2.39.5