From: Sage Weil Date: Sat, 3 May 2008 02:59:39 +0000 (-0700) Subject: kclient: dirty page accounting hell X-Git-Tag: v0.2~30 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1ce13809ae8771dc64c92cc2bf49bf5196c7d51e;p=ceph.git kclient: dirty page accounting hell --- diff --git a/src/kernel/addr.c b/src/kernel/addr.c index d727a3581747..53c8295cd955 100644 --- a/src/kernel/addr.c +++ b/src/kernel/addr.c @@ -290,7 +290,7 @@ get_more_pages: set_page_writeback(page); cleaned++; - dout(20, "%p locked+cleaned page %p idx %lu\n", + derr(20, "%p locked+cleaned page %p idx %lu\n", inode, page, page->index); if (pages) @@ -351,7 +351,7 @@ get_more_pages: if (i < wrote) SetPageUptodate(page); else if (rc < 0) { - dout(20, "%p redirtying page %p\n", + derr(20, "%p redirtying page %p\n", inode, page); redirty_page_for_writepage(wbc, page); } @@ -512,7 +512,8 @@ static int ceph_set_page_dirty(struct page *page) return !TestSetPageDirty(page); if (TestSetPageDirty(page)) { - dout(20, "set_page_dirty %p -- already dirty\n", page); + dout(20, "%p set_page_dirty %p -- already dirty\n", + mapping->host, page); return 0; } @@ -531,8 +532,13 @@ static int ceph_set_page_dirty(struct page *page) ci = ceph_inode(mapping->host); atomic_inc(&ci->i_wrbuffer_ref); - dout(20, "set_page_dirty %p %p %d -> %d (?)\n", page, - &ci->vfs_inode, + /* + * set PagePrivate so that we get invalidatepage callback + * on truncate for proper dirty page accounting for mmap + */ + SetPagePrivate(page); + derr(20, "%p set_page_dirty %p %d -> %d (?)\n", + mapping->host, page, atomic_read(&ci->i_wrbuffer_ref)-1, atomic_read(&ci->i_wrbuffer_ref)); } @@ -542,6 +548,36 @@ static int ceph_set_page_dirty(struct page *page) return 1; } +void ceph_invalidatepage(struct page *page, unsigned long offset) +{ + struct ceph_inode_info *ci; + + ClearPagePrivate(page); + if (!PageDirty(page)) + return; + if (!page->mapping) + return; + ci = ceph_inode(page->mapping->host); + if (offset <= (page->index << PAGE_CACHE_SHIFT)) { + derr(20, "%p invalidatepage %p idx %lu full dirty page\n", + &ci->vfs_inode, page, page->index); + atomic_dec(&ci->i_wrbuffer_ref); + /* + * pretty sure this is fundamentally racy. help! + */ + ClearPageDirty(page); + } else + derr(20, "%p invalidatepage %p idx %lu partial dirty page\n", + &ci->vfs_inode, page, page->index); +} + +int ceph_releasepage(struct page *page, gfp_t g) +{ + struct inode *inode = page->mapping ? page->mapping->host:0; + dout(20, "%p releasepage %p\n", inode, page); + WARN_ON(PageDirty(page)); + return 0; +} const struct address_space_operations ceph_aops = { .readpage = ceph_readpage, @@ -551,4 +587,6 @@ const struct address_space_operations ceph_aops = { .write_begin = ceph_write_begin, .write_end = ceph_write_end, .set_page_dirty = ceph_set_page_dirty, + .invalidatepage = ceph_invalidatepage, + .releasepage = ceph_releasepage, }; diff --git a/src/kernel/inode.c b/src/kernel/inode.c index cd35a56c5eb1..723cfc734ea5 100644 --- a/src/kernel/inode.c +++ b/src/kernel/inode.c @@ -1061,6 +1061,7 @@ void ceph_inode_writeback(struct work_struct *work) write_inode_now(&ci->vfs_inode, 0); } + /* * called by setattr */ @@ -1075,6 +1076,8 @@ static int apply_truncate(struct inode *inode, loff_t size) ci->i_reported_size = size; spin_unlock(&inode->i_lock); } + if (atomic_read(&ci->i_wrbuffer_ref) == 0) + ceph_check_caps(ci, 0); return rc; } @@ -1106,6 +1109,8 @@ void __ceph_do_pending_vmtruncate(struct inode *inode) if (to >= 0) { dout(10, "__do_pending_vmtruncate %p to %lld\n", inode, to); vmtruncate(inode, to); + if (atomic_read(&ci->i_wrbuffer_ref) == 0) + ceph_check_caps(ci, 0); } else dout(10, "__do_pending_vmtruncate %p nothing to do\n", inode); } @@ -1238,19 +1243,19 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr) { - int last = 0; + int was_last; int v; spin_lock(&ci->vfs_inode.i_lock); - last = atomic_sub_and_test(nr, &ci->i_wrbuffer_ref); + was_last = atomic_sub_and_test(nr, &ci->i_wrbuffer_ref); v = atomic_read(&ci->i_wrbuffer_ref); spin_unlock(&ci->vfs_inode.i_lock); dout(30, "put_wrbuffer_cap_refs on %p %d -> %d (?)%s\n", - &ci->vfs_inode, v+nr, v, last == 0 ? " LAST":""); - BUG_ON(v < 0); + &ci->vfs_inode, v+nr, v, was_last ? " LAST":""); + WARN_ON(v < 0); - if (last == 0) + if (was_last) ceph_check_caps(ci, 0); } diff --git a/src/kernel/mds_client.c b/src/kernel/mds_client.c index f42c2d20a5e7..e65f82bdebab 100644 --- a/src/kernel/mds_client.c +++ b/src/kernel/mds_client.c @@ -1566,7 +1566,8 @@ static void check_delayed_caps(struct ceph_mds_client *mdsc) } static void flush_write_caps(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session) + struct ceph_mds_session *session, + int purge) { struct list_head *p, *n; @@ -1576,14 +1577,24 @@ static void flush_write_caps(struct ceph_mds_client *mdsc, struct inode *inode = &cap->ci->vfs_inode; int used, wanted; + /* invalidate any dirty remaining pages */ + __ceph_do_pending_vmtruncate(inode); + spin_lock(&inode->i_lock); if ((cap->implemented & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) == 0) { spin_unlock(&inode->i_lock); continue; } + used = __ceph_caps_used(cap->ci); wanted = __ceph_caps_wanted(cap->ci); + if (purge && (used || wanted)) { + derr(0, "residual caps on %p used %d wanted %d %llu\n", + inode, used, wanted, inode->i_size); + used = wanted = 0; + } + __ceph_mdsc_send_cap(mdsc, session, cap, used, wanted, 1); } } @@ -1600,7 +1611,7 @@ static int close_session(struct ceph_mds_client *mdsc, if (session->s_state >= CEPH_MDS_SESSION_CLOSING) goto done; - flush_write_caps(mdsc, session); + flush_write_caps(mdsc, session, 1); session->s_state = CEPH_MDS_SESSION_CLOSING; msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE,