From 9f0287ef619214c31031ecd2e13cf5e909b04bfc Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 1 May 2008 14:42:03 -0700 Subject: [PATCH] kclient: improved dirty page accounting (supports mmap writes) --- src/kernel/addr.c | 48 ++++++++++++++++++++++++++++++++++++++++------ src/kernel/inode.c | 25 +++++++++++++----------- src/kernel/super.c | 3 ++- src/kernel/super.h | 5 +++-- 4 files changed, 61 insertions(+), 20 deletions(-) diff --git a/src/kernel/addr.c b/src/kernel/addr.c index 1c00f164d20f1..d727a35817479 100644 --- a/src/kernel/addr.c +++ b/src/kernel/addr.c @@ -1,9 +1,11 @@ +#include #include #include #include #include /* generic_writepages */ #include +#include int ceph_debug_addr = -1; #define DOUT_VAR ceph_debug_addr @@ -349,7 +351,6 @@ get_more_pages: if (i < wrote) SetPageUptodate(page); else if (rc < 0) { - cleaned--; dout(20, "%p redirtying page %p\n", inode, page); redirty_page_for_writepage(wbc, page); @@ -493,11 +494,6 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, if (!PageUptodate(page)) SetPageUptodate(page); - if (!PageDirty(page)) { - dout(20, "%p dirtying page %p\n", inode, page); - ceph_take_cap_refs(ceph_inode(inode), CEPH_CAP_WRBUFFER); - } else - dout(20, "%p page %p already dirty\n", inode, page); set_page_dirty(page); unlock_page(page); @@ -507,6 +503,45 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, } +static int ceph_set_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct ceph_inode_info *ci; + + if (unlikely(!mapping)) + return !TestSetPageDirty(page); + + if (TestSetPageDirty(page)) { + dout(20, "set_page_dirty %p -- already dirty\n", page); + return 0; + } + + write_lock_irq(&mapping->tree_lock); + if (page->mapping) { /* Race with truncate? */ + WARN_ON_ONCE(!PageUptodate(page)); + + if (mapping_cap_account_dirty(mapping)) { + __inc_zone_page_state(page, NR_FILE_DIRTY); + __inc_bdi_stat(mapping->backing_dev_info, + BDI_RECLAIMABLE); + task_io_account_write(PAGE_CACHE_SIZE); + } + radix_tree_tag_set(&mapping->page_tree, + page_index(page), PAGECACHE_TAG_DIRTY); + + ci = ceph_inode(mapping->host); + atomic_inc(&ci->i_wrbuffer_ref); + dout(20, "set_page_dirty %p %p %d -> %d (?)\n", page, + &ci->vfs_inode, + atomic_read(&ci->i_wrbuffer_ref)-1, + atomic_read(&ci->i_wrbuffer_ref)); + } + write_unlock_irq(&mapping->tree_lock); + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + + return 1; +} + const struct address_space_operations ceph_aops = { .readpage = ceph_readpage, @@ -515,4 +550,5 @@ const struct address_space_operations ceph_aops = { .writepages = ceph_writepages, .write_begin = ceph_write_begin, .write_end = ceph_write_end, + .set_page_dirty = ceph_set_page_dirty, }; diff --git a/src/kernel/inode.c b/src/kernel/inode.c index 3f945fd78e130..d2bd71123679b 100644 --- a/src/kernel/inode.c +++ b/src/kernel/inode.c @@ -1139,9 +1139,10 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got) if (got & CEPH_CAP_WR) ci->i_wr_ref++; if (got & CEPH_CAP_WRBUFFER) { - ci->i_wrbuffer_ref++; - dout(30, "__take_cap_refs %p wrbuffer %d -> %d\n", - &ci->vfs_inode, ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref); + atomic_inc(&ci->i_wrbuffer_ref); + dout(30, "__take_cap_refs %p wrbuffer %d -> %d (?)\n", + &ci->vfs_inode, atomic_read(&ci->i_wrbuffer_ref)-1, + atomic_read(&ci->i_wrbuffer_ref)); } } @@ -1196,10 +1197,11 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) if (--ci->i_wr_ref == 0) last++; if (had & CEPH_CAP_WRBUFFER) { - if (--ci->i_wrbuffer_ref == 0) + if (atomic_dec_and_test(&ci->i_wrbuffer_ref)) last++; - dout(30, "put_cap_refs %p wrbuffer %d -> %d\n", - &ci->vfs_inode, ci->i_wrbuffer_ref+1,ci->i_wrbuffer_ref); + dout(30, "put_cap_refs %p wrbuffer %d -> %d (?)\n", + &ci->vfs_inode, atomic_read(&ci->i_wrbuffer_ref)+1, + atomic_read(&ci->i_wrbuffer_ref)); } spin_unlock(&ci->vfs_inode.i_lock); @@ -1213,15 +1215,16 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr) { int last = 0; + int v; spin_lock(&ci->vfs_inode.i_lock); - ci->i_wrbuffer_ref -= nr; - last = ci->i_wrbuffer_ref; + last = atomic_sub_and_test(nr, &ci->i_wrbuffer_ref); + v = atomic_read(&ci->i_wrbuffer_ref); spin_unlock(&ci->vfs_inode.i_lock); - dout(30, "put_wrbuffer_cap_refs on %p %d -> %d%s\n", - &ci->vfs_inode, last+nr, last, last == 0 ? " LAST":""); - BUG_ON(ci->i_wrbuffer_ref < 0); + dout(30, "put_wrbuffer_cap_refs on %p %d -> %d (?)%s\n", + &ci->vfs_inode, v+nr, v, last == 0 ? " LAST":""); + BUG_ON(v < 0); if (last == 0) ceph_check_caps(ci, 0); diff --git a/src/kernel/super.c b/src/kernel/super.c index 0c57e6b19473a..d23dc53eb71df 100644 --- a/src/kernel/super.c +++ b/src/kernel/super.c @@ -172,7 +172,8 @@ static struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_requested_max_size = 0; ci->i_rd_ref = ci->i_rdcache_ref = 0; - ci->i_wr_ref = ci->i_wrbuffer_ref = 0; + ci->i_wr_ref = 0; + atomic_set(&ci->i_wrbuffer_ref, 0); ci->i_hold_caps_until = 0; INIT_LIST_HEAD(&ci->i_cap_delay_list); diff --git a/src/kernel/super.h b/src/kernel/super.h index 0278f76d0b494..0e77e0163e4aa 100644 --- a/src/kernel/super.h +++ b/src/kernel/super.h @@ -191,7 +191,8 @@ struct ceph_inode_info { struct timespec i_old_atime; /* held references to caps */ - int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wrbuffer_ref; + int i_rd_ref, i_rdcache_ref, i_wr_ref; + atomic_t i_wrbuffer_ref; unsigned long i_hashval; @@ -284,7 +285,7 @@ static inline int __ceph_caps_used(struct ceph_inode_info *ci) used |= CEPH_CAP_RDCACHE; if (ci->i_wr_ref) used |= CEPH_CAP_WR; - if (ci->i_wrbuffer_ref) + if (atomic_read(&ci->i_wrbuffer_ref)) used |= CEPH_CAP_WRBUFFER; return used; } -- 2.39.5