]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
kclient: improved dirty page accounting (supports mmap writes)
authorSage Weil <sage@newdream.net>
Thu, 1 May 2008 21:42:03 +0000 (14:42 -0700)
committerSage Weil <sage@newdream.net>
Thu, 1 May 2008 21:42:03 +0000 (14:42 -0700)
src/kernel/addr.c
src/kernel/inode.c
src/kernel/super.c
src/kernel/super.h

index 1c00f164d20f12b86d63ae966492f9016dee83be..d727a35817479d11fb3e02d62b00709283344f9a 100644 (file)
@@ -1,9 +1,11 @@
 
+#include <linux/backing-dev.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/writeback.h>   /* generic_writepages */
 #include <linux/pagevec.h>
+#include <linux/task_io_accounting_ops.h>
 
 int ceph_debug_addr = -1;
 #define DOUT_VAR ceph_debug_addr
@@ -349,7 +351,6 @@ get_more_pages:
                                if (i < wrote)
                                        SetPageUptodate(page);
                                else if (rc < 0) {
-                                       cleaned--;
                                        dout(20, "%p redirtying page %p\n", 
                                             inode, page);
                                        redirty_page_for_writepage(wbc, page);
@@ -493,11 +494,6 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
        if (!PageUptodate(page))
                SetPageUptodate(page);
 
-       if (!PageDirty(page)) {
-               dout(20, "%p dirtying page %p\n", inode, page);
-               ceph_take_cap_refs(ceph_inode(inode), CEPH_CAP_WRBUFFER);
-       } else
-               dout(20, "%p page %p already dirty\n", inode, page);
        set_page_dirty(page);
 
        unlock_page(page);
@@ -507,6 +503,45 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
 }
 
 
+static int ceph_set_page_dirty(struct page *page)
+{
+       struct address_space *mapping = page->mapping;
+       struct ceph_inode_info *ci;
+
+       if (unlikely(!mapping))
+               return !TestSetPageDirty(page);
+
+       if (TestSetPageDirty(page)) {
+               dout(20, "set_page_dirty %p -- already dirty\n", page);
+               return 0;
+       }
+
+       write_lock_irq(&mapping->tree_lock);
+       if (page->mapping) {    /* Race with truncate? */
+               WARN_ON_ONCE(!PageUptodate(page));
+
+               if (mapping_cap_account_dirty(mapping)) {
+                       __inc_zone_page_state(page, NR_FILE_DIRTY);
+                       __inc_bdi_stat(mapping->backing_dev_info,
+                                       BDI_RECLAIMABLE);
+                       task_io_account_write(PAGE_CACHE_SIZE);
+               }
+               radix_tree_tag_set(&mapping->page_tree,
+                               page_index(page), PAGECACHE_TAG_DIRTY);
+
+               ci = ceph_inode(mapping->host);
+               atomic_inc(&ci->i_wrbuffer_ref);
+               dout(20, "set_page_dirty %p %p %d -> %d (?)\n", page,
+                    &ci->vfs_inode,
+                    atomic_read(&ci->i_wrbuffer_ref)-1,
+                    atomic_read(&ci->i_wrbuffer_ref));
+       }
+       write_unlock_irq(&mapping->tree_lock);
+       __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+
+       return 1;
+}
+
 
 const struct address_space_operations ceph_aops = {
        .readpage = ceph_readpage,
@@ -515,4 +550,5 @@ const struct address_space_operations ceph_aops = {
        .writepages = ceph_writepages,
        .write_begin = ceph_write_begin,
        .write_end = ceph_write_end,
+       .set_page_dirty = ceph_set_page_dirty,
 };
index 3f945fd78e130f6e084c3277830e41eff610aa2f..d2bd71123679b088ae62408c37d5c2dd82f7d8fb 100644 (file)
@@ -1139,9 +1139,10 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got)
        if (got & CEPH_CAP_WR)
                ci->i_wr_ref++;
        if (got & CEPH_CAP_WRBUFFER) {
-               ci->i_wrbuffer_ref++;
-               dout(30, "__take_cap_refs %p wrbuffer %d -> %d\n",
-                    &ci->vfs_inode, ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref);
+               atomic_inc(&ci->i_wrbuffer_ref);
+               dout(30, "__take_cap_refs %p wrbuffer %d -> %d (?)\n",
+                    &ci->vfs_inode, atomic_read(&ci->i_wrbuffer_ref)-1,
+                    atomic_read(&ci->i_wrbuffer_ref));
        }
 }
 
@@ -1196,10 +1197,11 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
                if (--ci->i_wr_ref == 0)
                        last++;
        if (had & CEPH_CAP_WRBUFFER) {
-               if (--ci->i_wrbuffer_ref == 0)
+               if (atomic_dec_and_test(&ci->i_wrbuffer_ref))
                        last++;
-               dout(30, "put_cap_refs %p wrbuffer %d -> %d\n",
-                    &ci->vfs_inode, ci->i_wrbuffer_ref+1,ci->i_wrbuffer_ref);
+               dout(30, "put_cap_refs %p wrbuffer %d -> %d (?)\n",
+                    &ci->vfs_inode, atomic_read(&ci->i_wrbuffer_ref)+1,
+                    atomic_read(&ci->i_wrbuffer_ref));
        }
        spin_unlock(&ci->vfs_inode.i_lock);
 
@@ -1213,15 +1215,16 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
 void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr)
 {
        int last = 0;
+       int v;
 
        spin_lock(&ci->vfs_inode.i_lock);
-       ci->i_wrbuffer_ref -= nr;
-       last = ci->i_wrbuffer_ref;
+       last = atomic_sub_and_test(nr, &ci->i_wrbuffer_ref);
+       v = atomic_read(&ci->i_wrbuffer_ref);
        spin_unlock(&ci->vfs_inode.i_lock);
 
-       dout(30, "put_wrbuffer_cap_refs on %p %d -> %d%s\n",
-            &ci->vfs_inode, last+nr, last, last == 0 ? " LAST":"");
-       BUG_ON(ci->i_wrbuffer_ref < 0);
+       dout(30, "put_wrbuffer_cap_refs on %p %d -> %d (?)%s\n",
+            &ci->vfs_inode, v+nr, v, last == 0 ? " LAST":"");
+       BUG_ON(v < 0);
 
        if (last == 0)
                ceph_check_caps(ci, 0);
index 0c57e6b19473aa9dc926d93c3fe16b1a75c92419..d23dc53eb71df9a48c2177f06ea8f6970d0f61ea 100644 (file)
@@ -172,7 +172,8 @@ static struct inode *ceph_alloc_inode(struct super_block *sb)
        ci->i_requested_max_size = 0;
 
        ci->i_rd_ref = ci->i_rdcache_ref = 0;
-       ci->i_wr_ref = ci->i_wrbuffer_ref = 0;
+       ci->i_wr_ref = 0;
+       atomic_set(&ci->i_wrbuffer_ref, 0);
        ci->i_hold_caps_until = 0;
        INIT_LIST_HEAD(&ci->i_cap_delay_list);
 
index 0278f76d0b494129e3b4afa9fa28091803cefdb9..0e77e0163e4aa7a554e3ce74532d797435fec583 100644 (file)
@@ -191,7 +191,8 @@ struct ceph_inode_info {
        struct timespec i_old_atime;
 
        /* held references to caps */
-       int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wrbuffer_ref;
+       int i_rd_ref, i_rdcache_ref, i_wr_ref;
+       atomic_t i_wrbuffer_ref;
 
        unsigned long i_hashval;
 
@@ -284,7 +285,7 @@ static inline int __ceph_caps_used(struct ceph_inode_info *ci)
                used |= CEPH_CAP_RDCACHE;
        if (ci->i_wr_ref)
                used |= CEPH_CAP_WR;
-       if (ci->i_wrbuffer_ref)
+       if (atomic_read(&ci->i_wrbuffer_ref))
                used |= CEPH_CAP_WRBUFFER;
        return used;
 }