From 955351ff90172bdc41e8070dc10f980db9e07e85 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 17 Nov 2009 15:05:41 -0800 Subject: [PATCH] sepia: valgrind for a while --- src/TODO | 82 +++++++++++++++++++++++++++++++++++++++++++++ src/ceph.conf.sepia | 3 +- 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/src/TODO b/src/TODO index d00674663addf..943186ea2d5ef 100644 --- a/src/TODO +++ b/src/TODO @@ -75,6 +75,88 @@ mds/CInode.cc:1233: FAILED assert(pi->rstat.rfiles >= 0) - mislinked directory? - premature filejournal trimming? - weird osd_lock contention during osd restart? +- kclient: +[85858.693538] BUG: sleeping function called from invalid context at kernel/mute +x.c:280 +[85858.701570] in_atomic(): 1, irqs_disabled(): 0, pid: 2762, name: cp +[85858.708027] 1 lock held by cp/2762: +[85858.711652] #0: (&dentry->d_lock){+.+...}, at: [] ceph_d_ +revalidate+0xae/0x41c [ceph] +[85858.721612] Pid: 2762, comm: cp Not tainted 2.6.32-rc2 #1 +[85858.727176] Call Trace: +[85858.729738] [] ? __debug_show_held_locks+0x22/0x24 +[85858.736309] [] __might_sleep+0x115/0x11a +[85858.742000] [] mutex_lock_nested+0x29/0x32a +[85858.747957] [] ? get_lock_stats+0x19/0x4c +[85858.753761] [] reset_connection+0x28/0xe4 [ceph] +[85858.760148] [] ceph_con_shutdown+0x2f/0x70 [ceph] +[85858.766630] [] ceph_put_mds_session+0x48/0x9a [ceph] +[85858.773378] [] __ceph_mdsc_drop_dentry_lease+0x18/0x23 [ceph] +[85858.780924] [] ceph_d_revalidate+0x17b/0x41c [ceph] +[85858.787569] [] ? __d_lookup+0x0/0x195 +[85858.793001] [] do_lookup+0x166/0x1bb +[85858.798362] [] __link_path_walk+0x38b/0xe8c +[85858.804319] [] path_walk+0x69/0xd4 +[85858.809476] [] do_filp_open+0x178/0x9dc +[85858.815088] [] ? put_lock_stats+0xe/0x27 +[85858.820771] [] ? _spin_unlock+0x30/0x4b +[85858.826373] [] ? alloc_fd+0x11d/0x12e +[85858.831811] [] do_sys_open+0x5d/0x10b +[85858.837241] [] sys_open+0x1b/0x1d +[85858.842334] [] tracesys+0xd0/0xd5 +- kclient: after reconnect, +cp: writing `/c/ceph2.2/bin/gs-gpl': Bad file descriptor + - need to somehow wake up unreconnected caps? hrm!! +- kclient: ~300 (306, 311) second delay before able to reconnect to restarted monitor??? +- kclient: socket creation +- kclient: bdi thing after mount failures, multiple attempts +[ 1438.509155] ------------[ cut here ]------------ +[ 1438.513933] WARNING: at fs/sysfs/dir.c:487 sysfs_add_one+0xf3/0x10a() +[ 1438.520560] Hardware name: PDSMi +[ 1438.523898] sysfs: cannot create duplicate filename '/class/bdi/0:25' +[ 1438.530526] Modules linked in: ceph fan ac battery container ehci_hcd uhci_hcd thermal button processor +[ 1438.546600] Pid: 2829, comm: mount.ceph Tainted: G W 2.6.32-rc2 #1 +[ 1438.553722] Call Trace: +[ 1438.556279] [] ? sysfs_add_one+0xf3/0x10a +[ 1438.562179] [] warn_slowpath_common+0x77/0xa4 +[ 1438.568399] [] warn_slowpath_fmt+0x64/0x66 +[ 1438.574364] [] ? trace_hardirqs_on_caller+0x113/0x13e +[ 1438.581312] [] ? sysfs_pathname+0x37/0x3f +[ 1438.587132] [] ? sysfs_pathname+0x37/0x3f +[ 1438.593017] [] ? sysfs_pathname+0x37/0x3f +[ 1438.598894] [] sysfs_add_one+0xf3/0x10a +[ 1438.604593] [] create_dir+0x58/0x93 +[ 1438.609929] [] sysfs_create_dir+0x38/0x4f +[ 1438.615825] [] ? _spin_unlock+0x30/0x4b +[ 1438.621520] [] kobject_add_internal+0x125/0x201 +[ 1438.627939] [] kobject_add_varg+0x41/0x4d +[ 1438.633820] [] kobject_add+0x89/0x8b +[ 1438.639263] [] ? mark_held_locks+0x4d/0x6b +[ 1438.645245] [] ? lockdep_init_map+0xae/0x540 +[ 1438.651351] [] ? kobject_get+0x1a/0x22 +[ 1438.656906] [] ? get_device+0x14/0x1a +[ 1438.662371] [] device_add+0x119/0x627 +[ 1438.667877] [] ? __spin_lock_init+0x31/0x54 +[ 1438.673933] [] device_register+0x19/0x1d +[ 1438.679703] [] device_create_vargs+0x10e/0x13b +[ 1438.686028] [] bdi_register+0x80/0x192 +[ 1438.691635] [] ? lockdep_init_map+0xae/0x540 +[ 1438.697762] [] ? mempool_kmalloc+0x11/0x13 +[ 1438.703714] [] ? mempool_create_node+0x122/0x16e +[ 1438.710218] [] ? ceph_set_super+0x0/0xd8 [ceph] +[ 1438.716620] [] ? mempool_kfree+0x0/0xb +[ 1438.722221] [] ? mempool_kmalloc+0x0/0x13 +[ 1438.728072] [] bdi_register_dev+0x23/0x25 +[ 1438.733944] [] ceph_get_sb+0xa20/0x104f [ceph] +[ 1438.740267] [] ? __kmalloc+0x15c/0x1ef +[ 1438.745869] [] ? __alloc_percpu+0xb/0xd +[ 1438.751545] [] vfs_kern_mount+0x9d/0x158 +[ 1438.757359] [] do_kern_mount+0x47/0xe7 +[ 1438.762967] [] do_mount+0x743/0x7a9 +[ 1438.768284] [] ? strndup_user+0x5d/0x85 +[ 1438.773962] [] sys_mount+0x7f/0xc1 +[ 1438.779204] [] ? trace_hardirqs_on_thunk+0x3a/0x3f +[ 1438.785846] [] system_call_fastpath+0x16/0x1b greg - osd: error handling diff --git a/src/ceph.conf.sepia b/src/ceph.conf.sepia index 76e4ea5599e03..e1bcae4ee150f 100644 --- a/src/ceph.conf.sepia +++ b/src/ceph.conf.sepia @@ -26,6 +26,7 @@ host = sepia1 mon addr = 10.3.14.128:6789 log dir = /data/sepia1 + valgrind = "--tool=memcheck --log-file=/data/sepia1/v.mon$id" ; --------------------- [mds] @@ -64,7 +65,7 @@ filestore max sync interval = 5 ; btrfs devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0" -; valgrind = "--tool=memcheck --log-file=/data/cosd$id/v.osd$id" + valgrind = "--tool=memcheck --log-file=/data/sepia$id/v.osd$id" [osd2] host = sepia2 -- 2.39.5