]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
kclient: ref count cap_snap to avoid unnecessarily copying into temp variables
authorSage Weil <sage@newdream.net>
Sun, 4 Jan 2009 23:41:03 +0000 (15:41 -0800)
committerSage Weil <sage@newdream.net>
Sun, 4 Jan 2009 23:41:03 +0000 (15:41 -0800)
src/TODO
src/kernel/caps.c
src/kernel/snap.c
src/kernel/super.h

index 15240a73b3fe75a10a60d0691dfc5e3aa71ac977..4fe87604f78a79d0966cbc58028dc5bbcbb745ea 100644 (file)
--- a/src/TODO
+++ b/src/TODO
@@ -50,6 +50,9 @@ caps
   - client should not get unsolicited MClientCaps if wanted==0.
   - if we do un-acked cap release, we need to handle unsolicited import/export
   - may unacked release _only_ if wanted==0?
+- maybe we really want a 'dirty' mask
+  - client knows when it has dirty data to writeback, and thus when it must wait for it to flush?
+
 - kclient
   - only pin caps with wanted != 0?
   - put unwanted caps on an lru list; expire
index 1f1f0535396a02e4479947be29c5f8202a4d472d..b3bcbd26dbc7c078ff3d95701110c6e2adc6ee7f 100644 (file)
@@ -307,7 +307,9 @@ static void send_cap_msg(struct ceph_mds_client *mdsc, u64 ino, int op,
                         int caps, int wanted, u64 seq, u64 mseq,
                         u64 size, u64 max_size,
                         struct timespec *mtime, struct timespec *atime,
-                        u64 time_warp_seq, u64 follows, int mds)
+                        u64 time_warp_seq,
+                        uid_t uid, gid_t gid, mode_t mode,
+                        u64 follows, int mds)
 {
        struct ceph_mds_caps *fc;
        struct ceph_msg *msg;
@@ -369,6 +371,9 @@ static void __send_cap(struct ceph_mds_client *mdsc,
        struct timespec mtime, atime;
        int wake = 0;
        int op = CEPH_CAP_OP_ACK;
+       mode_t mode;
+       uid_t uid;
+       gid_t gid;
 
        if (wanted == 0)
                op = CEPH_CAP_OP_RELEASE;
@@ -399,6 +404,9 @@ static void __send_cap(struct ceph_mds_client *mdsc,
        atime = inode->i_atime;
        time_warp_seq = ci->i_time_warp_seq;
        follows = ci->i_snap_realm->cached_context->seq;
+       uid = inode->i_uid;
+       gid = inode->i_gid;
+       mode = inode->i_mode;
        spin_unlock(&inode->i_lock);
 
        if (dropping & CEPH_CAP_FILE_RDCACHE) {
@@ -410,6 +418,7 @@ static void __send_cap(struct ceph_mds_client *mdsc,
        send_cap_msg(mdsc, ceph_vino(inode).ino,
                     op, keep, wanted, seq, mseq,
                     size, max_size, &mtime, &atime, time_warp_seq,
+                    uid, gid, mode,
                     follows, session->s_mds);
 
        if (wake)
@@ -433,11 +442,6 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
        int mds;
        struct list_head *p;
        struct ceph_cap_snap *capsnap;
-       u64 follows;
-       int issued;
-       u64 size;
-       struct timespec mtime, atime, ctime;
-       u64 time_warp_seq;
        u32 mseq;
        struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
        struct ceph_mds_session *session = NULL; /* if session != NULL, we hold
@@ -491,25 +495,23 @@ retry:
                        goto retry;
                }
 
-               follows = capsnap->follows;
-               size = capsnap->size;
-               atime = capsnap->atime;
-               mtime = capsnap->mtime;
-               ctime = capsnap->ctime;
-               time_warp_seq = capsnap->time_warp_seq;
-               issued = capsnap->issued;
+               atomic_inc(&capsnap->nref);
                spin_unlock(&inode->i_lock);
 
                dout(10, "flush_snaps %p cap_snap %p follows %lld size %llu\n",
-                    inode, capsnap, next_follows, size);
+                    inode, capsnap, next_follows, capsnap->size);
                send_cap_msg(mdsc, ceph_vino(inode).ino,
-                            CEPH_CAP_OP_FLUSHSNAP, issued, 0, 0, mseq,
-                            size, 0,
-                            &mtime, &atime, time_warp_seq,
-                            follows, mds);
+                            CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, 0, mseq,
+                            capsnap->size, 0,
+                            &capsnap->mtime, &capsnap->atime,
+                            capsnap->time_warp_seq,
+                            capsnap->uid, capsnap->gid, capsnap->mode,
+                            capsnap->follows, mds);
+
+               next_follows = capsnap->follows + 1;
+               ceph_put_cap_snap(capsnap);
 
                spin_lock(&inode->i_lock);
-               next_follows = follows + 1;
                goto retry;
        }
 
@@ -1155,7 +1157,7 @@ static void handle_cap_flushedsnap(struct inode *inode,
                             capsnap, follows);
                        ceph_put_snap_context(capsnap->context);
                        list_del(&capsnap->ci_item);
-                       kfree(capsnap);
+                       ceph_put_cap_snap(capsnap);
                        drop = 1;
                        break;
                } else {
@@ -1365,7 +1367,7 @@ void ceph_handle_caps(struct ceph_mds_client *mdsc,
        if (!inode) {
                dout(10, " i don't have ino %llx, sending release\n", vino.ino);
                send_cap_msg(mdsc, vino.ino, CEPH_CAP_OP_RELEASE, 0, 0, seq,
-                            size, 0, 0, NULL, NULL, 0, 0, mds);
+                            size, 0, 0, NULL, NULL, 0, 0, 0, 0, 0, mds);
                goto no_inode;
        }
 
index 87afc6428368cfe817fc075c3696a671febf7506..8d8080f886c0ceb4065b550534031be8cec7074d 100644 (file)
@@ -310,6 +310,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci,
                derr(10, "ENOMEM allocating ceph_cap_snap on %p\n", inode);
                return;
        }
+       atomic_set(&capsnap->nref, 1);
 
        spin_lock(&inode->i_lock);
        used = __ceph_caps_used(ci);
@@ -326,6 +327,15 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci,
                capsnap->follows = snapc->seq - 1;
                capsnap->context = ceph_get_snap_context(snapc);
                capsnap->issued = __ceph_caps_issued(ci, NULL);
+
+               capsnap->mode = inode->i_mode;
+               capsnap->uid = inode->i_uid;
+               capsnap->gid = inode->i_gid;
+
+               /* fixme? */
+               capsnap->xattr_blob = 0;
+               capsnap->xattr_len = 0;
+
                /* dirty page count moved from _head to this cap_snap;
                   all subsequent writes page dirties occur _after_ this
                   snapshot. */
index 82b1efa8db652ac89155e2b0ff4d46ad43da62f6..91db5b7bae7361fd8991bfbb56e1030445be1dfe 100644 (file)
@@ -144,17 +144,33 @@ struct ceph_cap {
  * data before flushing the snapped state (tracked here) back to the MDS.
  */
 struct ceph_cap_snap {
+       atomic_t nref;
+
        struct list_head ci_item;
        u64 follows;
        int issued;
+       struct ceph_snap_context *context;
+       
+       mode_t mode;
+       uid_t uid;
+       gid_t gid;
+
+       void *xattr_blob;
+       int xattr_len;  
+
        u64 size;
        struct timespec mtime, atime, ctime;
        u64 time_warp_seq;
-       struct ceph_snap_context *context;
        int writing;   /* a sync write is still in progress */
        int dirty;     /* dirty pages awaiting writeback */
 };
 
+static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
+{
+       if (atomic_dec_and_test(&capsnap->nref))
+               kfree(capsnap);
+}
+
 /*
  * The frag tree describes how a directory is fragmented, potentially across
  * multiple metadata servers.  It is also used to indicate points where