]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
kclient: fix snap_rwsem vs s_mutex deadlock in ceph_inode_set_size
authorSage Weil <sage@newdream.net>
Thu, 4 Jun 2009 19:28:51 +0000 (12:28 -0700)
committerSage Weil <sage@newdream.net>
Thu, 4 Jun 2009 19:28:51 +0000 (12:28 -0700)
Call ceph_check_caps outside of the snap_rwsem.  Fixes this lockdep
warning:

[84919.839574] cp/2724 is trying to acquire lock:
[84919.839574]  (&s->s_mutex){--..}, at: [<ffffffffa00fa0f3>] ceph_check_caps+0x
968/0xc04 [ceph]
[84919.839574]
[84919.839574] but task is already holding lock:
[84919.839574]  (&mdsc->snap_rwsem){----}, at: [<ffffffffa00f3803>] ceph_write_b
egin+0x1d9/0x696 [ceph]
[84919.839574]
[84919.839574] which lock already depends on the new lock.
[84919.839574]
[84919.839574]
[84919.839574] the existing dependency chain (in reverse order) is:
[84919.839574]
[84919.839574] -> #1 (&mdsc->snap_rwsem){----}:
[84919.839574]        [<ffffffff8025e4bd>] validate_chain+0x9ef/0xce8
[84919.839574]        [<ffffffff8025eee6>] __lock_acquire+0x730/0x7b9
[84919.839574]        [<ffffffff8025eff4>] lock_acquire+0x85/0xa9
[84919.839574]        [<ffffffff8061577d>] down_write+0x43/0x77
[84919.839574]        [<ffffffffa010ab3c>] ceph_mdsc_handle_reply+0x960/0xa72 [c
eph]
[84919.839574]        [<ffffffffa00e7520>] ceph_dispatch+0x4eb/0x5f6 [ceph]
[84919.839574]        [<ffffffffa0103906>] try_read+0x15cf/0x17da [ceph]
[84919.839574]        [<ffffffffa0103da0>] con_work+0x28f/0x18a9 [ceph]
[84919.839574]        [<ffffffff8024cb7c>] run_workqueue+0xf5/0x209
[84919.839574]        [<ffffffff8024d88b>] worker_thread+0xdb/0xe8
[84919.839574]        [<ffffffff8025078c>] kthread+0x49/0x79
[84919.839574]        [<ffffffff8020d17a>] child_rip+0xa/0x20
[84919.839574]        [<ffffffffffffffff>] 0xffffffffffffffff
[84919.839574]
[84919.839574] -> #0 (&s->s_mutex){--..}:
[84919.839574]        [<ffffffff8025e14f>] validate_chain+0x681/0xce8
[84919.839574]        [<ffffffff8025eee6>] __lock_acquire+0x730/0x7b9
[84919.839574]        [<ffffffff8025eff4>] lock_acquire+0x85/0xa9
[84919.839574]        [<ffffffff80614fd2>] mutex_lock_nested+0x116/0x2f4
[84919.839574]        [<ffffffffa00fa0f3>] ceph_check_caps+0x968/0xc04 [ceph]
[84919.839574]        [<ffffffffa00e9c19>] ceph_inode_set_size+0xe1/0xf4 [ceph]
[84919.839574]        [<ffffffffa00f2e3c>] ceph_write_end+0x16f/0x1b4 [ceph]
[84919.839574]        [<ffffffff8027e33c>] generic_file_buffered_write+0x1a2/0x2
d4
[84919.839574]        [<ffffffff8027e958>] __generic_file_aio_write_nolock+0x354
/0x3be
[84919.839574]        [<ffffffff8027ebe4>] generic_file_aio_write+0x66/0xc2
[84919.839574]        [<ffffffffa00f1737>] ceph_aio_write+0x969/0xb52 [ceph]
[84919.839574]        [<ffffffff802aa3eb>] do_sync_write+0xe2/0x126
[84919.839574]        [<ffffffff802aabbc>] vfs_write+0xae/0x137
[84919.839574]        [<ffffffff802ab072>] sys_write+0x47/0x6f
[84919.839574]        [<ffffffff8020c0db>] system_call_fastpath+0x16/0x1b
[84919.839574]        [<ffffffffffffffff>] 0xffffffffffffffff
[84919.839574]
[84919.839574] other info that might help us debug this:
[84919.839574]
[84919.839574] 2 locks held by cp/2724:
[84919.839574]  #0:  (&sb->s_type->i_mutex_key#12){--..}, at: [<ffffffff8027ebcd
>] generic_file_aio_write+0x4f/0xc2
[84919.839574]  #1:  (&mdsc->snap_rwsem){----}, at: [<ffffffffa00f3803>] ceph_wr
ite_begin+0x1d9/0x696 [ceph]
[84919.839574]
[84919.839574] stack backtrace:
[84919.839574] Pid: 2724, comm: cp Not tainted 2.6.29 #16
[84919.839574] Call Trace:
[84919.839574]  [<ffffffff8025dac3>] print_circular_bug_tail+0xc7/0xd2
[84919.839574]  [<ffffffff8025e14f>] validate_chain+0x681/0xce8
[84919.839574]  [<ffffffff8028352a>] ? get_page_from_freelist+0x3c6/0x6b5
[84919.839574]  [<ffffffff8025eee6>] __lock_acquire+0x730/0x7b9
[84919.839574]  [<ffffffff8025eff4>] lock_acquire+0x85/0xa9
[84919.839574]  [<ffffffffa00fa0f3>] ? ceph_check_caps+0x968/0xc04 [ceph]
[84919.839574]  [<ffffffff80614fd2>] mutex_lock_nested+0x116/0x2f4
[84919.839574]  [<ffffffffa00fa0f3>] ? ceph_check_caps+0x968/0xc04 [ceph]
[84919.839574]  [<ffffffffa00fa0f3>] ? ceph_check_caps+0x968/0xc04 [ceph]
[84919.839574]  [<ffffffffa00fa0f3>] ceph_check_caps+0x968/0xc04 [ceph]
[84919.839574]  [<ffffffff8025a70e>] ? get_lock_stats+0x34/0x5e
[84919.839574]  [<ffffffffa00e9c19>] ceph_inode_set_size+0xe1/0xf4 [ceph]
[84919.839574]  [<ffffffffa00f2e3c>] ceph_write_end+0x16f/0x1b4 [ceph]
[84919.839574]  [<ffffffff8027e33c>] generic_file_buffered_write+0x1a2/0x2d4
[84919.839574]  [<ffffffff8027e958>] __generic_file_aio_write_nolock+0x354/0x3be
[84919.839574]  [<ffffffff8027ebe4>] generic_file_aio_write+0x66/0xc2
[84919.839574]  [<ffffffffa00f1737>] ceph_aio_write+0x969/0xb52 [ceph]
[84919.839574]  [<ffffffff802bd728>] ? touch_atime+0xee/0x110
[84919.839574]  [<ffffffff80329414>] ? nfs_file_read+0x107/0x116
[84919.839574]  [<ffffffff802aa3eb>] do_sync_write+0xe2/0x126
[84919.839574]  [<ffffffff8025ef04>] ? __lock_acquire+0x74e/0x7b9
[84919.839574]  [<ffffffff8025a746>] ? put_lock_stats+0xe/0x27
[84919.839574]  [<ffffffff802508a3>] ? autoremove_wake_function+0x0/0x38
[84919.839574]  [<ffffffff802d1d5b>] ? dnotify_parent+0x6c/0x74
[84919.839574]  [<ffffffff803d5f70>] ? security_file_permission+0x11/0x13
[84919.839574]  [<ffffffff802aabbc>] vfs_write+0xae/0x137
[84919.839574]  [<ffffffff802ab072>] sys_write+0x47/0x6f
[84919.839574]  [<ffffffff8020c0db>] system_call_fastpath+0x16/0x1b

src/kernel/addr.c
src/kernel/file.c
src/kernel/inode.c
src/kernel/super.h

index 326125f99465106398c7cf3896d64db7e3fef939..4ee5d75fceffca4b43020c30499f2e696afba170 100644 (file)
@@ -996,6 +996,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
        struct inode *inode = file->f_dentry->d_inode;
        struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
        unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+       int check_cap = 0;
 
        dout(10, "write_end file %p inode %p page %p %d~%d (%d)\n", file,
             inode, page, (int)pos, (int)copied, (int)len);
@@ -1012,7 +1013,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
        /* did file size increase? */
        /* (no need for i_size_read(); we caller holds i_mutex */
        if (pos+copied > inode->i_size)
-               ceph_inode_set_size(inode, pos+copied);
+               check_cap = ceph_inode_set_size(inode, pos+copied);
 
        if (!PageUptodate(page))
                SetPageUptodate(page);
@@ -1023,6 +1024,9 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
        up_read(&mdsc->snap_rwsem);
        page_cache_release(page);
 
+       if (check_cap)
+               ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
+
        return copied;
 }
 
index 8d21e3a1031909c0c17fd3109961e14fd14f2065..46a708e549edb208b38edca1cbf5b91eacdfd3f9 100644 (file)
@@ -526,6 +526,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
        int written = 0;
        int flags;
        int do_sync = 0;
+       int check_caps = 0;
        int ret;
        struct timespec mtime = CURRENT_TIME;
 
@@ -632,7 +633,10 @@ out:
                ret = written;
                *offset = pos;
                if (pos > i_size_read(inode))
-                       ceph_inode_set_size(inode, pos);
+                       check_caps = ceph_inode_set_size(inode, pos);
+               if (check_caps)
+                       ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY,
+                                       NULL);
        }
        return ret;
 }
index 92f226fb8729fdb25ddf9fd2b04e6e63e4a170e7..1179f2ead4d875f96d6c0cb53a250ec3208b48e6 100644 (file)
@@ -1143,9 +1143,10 @@ out:
        return err;
 }
 
-void ceph_inode_set_size(struct inode *inode, loff_t size)
+int ceph_inode_set_size(struct inode *inode, loff_t size)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
+       int ret = 0;
 
        spin_lock(&inode->i_lock);
        dout(30, "set_size %p %llu -> %llu\n", inode, inode->i_size, size);
@@ -1154,12 +1155,11 @@ void ceph_inode_set_size(struct inode *inode, loff_t size)
 
        /* tell the MDS if we are approaching max_size */
        if ((size << 1) >= ci->i_max_size &&
-           (ci->i_reported_size << 1) < ci->i_max_size) {
-               spin_unlock(&inode->i_lock);
-               ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
-       } else {
-               spin_unlock(&inode->i_lock);
-       }
+           (ci->i_reported_size << 1) < ci->i_max_size)
+               ret = 1;
+
+       spin_unlock(&inode->i_lock);
+       return ret;
 }
 
 /*
index 8b12b359b88f9c97f660f60814814fe481897fdd..9bd3a7970c2b55adadb03b9bd9595bdd4b0a0b72 100644 (file)
@@ -799,7 +799,7 @@ extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 
 extern int ceph_inode_holds_cap(struct inode *inode, int mask);
 
-extern void ceph_inode_set_size(struct inode *inode, loff_t size);
+extern int ceph_inode_set_size(struct inode *inode, loff_t size);
 extern void ceph_inode_writeback(struct work_struct *work);
 extern void ceph_vmtruncate_work(struct work_struct *work);
 extern void __ceph_do_pending_vmtruncate(struct inode *inode);