From: Jeff Layton Date: Wed, 7 Jul 2021 13:20:34 +0000 (-0400) Subject: [DO NOT MERGE] ceph: dump info about cap flushes when we're waiting too long for... X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=e4a9e6b170f5b5d4f0e832526ecf5273c23df7a0;p=ceph-client.git [DO NOT MERGE] ceph: dump info about cap flushes when we're waiting too long for them We've had some cases of hung umounts in teuthology testing. It looks like client is waiting for cap flushes to complete, but they aren't. Add a field to the inode to track the highest cap flush tid seen for that inode. Also, add a backpointer to the inode to the ceph_cap_flush struct. Change wait_caps_flush to wait 60s, and then dump info about the condition of the list. Also, print pr_info messages if we end up dropping a FLUSH_ACK for an inode onto the floor, or if we get a message on an unregistered session. Reported-by: Patrick Donnelly URL: https://tracker.ceph.com/issues/51279 Signed-off-by: Jeff Layton --- diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 5c14ef04e4742..b8d0e7610e103 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1826,6 +1826,7 @@ static u64 __mark_caps_flushing(struct inode *inode, swap(cf, ci->i_prealloc_cap_flush); cf->caps = flushing; cf->wake = wake; + cf->ci = ci; spin_lock(&mdsc->cap_dirty_lock); list_del_init(&ci->i_dirty_item); @@ -3627,6 +3628,10 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, bool wake_ci = false; bool wake_mdsc = false; + /* track latest cap flush ack seen for this inode */ + if (flush_tid > ci->i_last_cap_flush_ack) + ci->i_last_cap_flush_ack = flush_tid; + list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) { /* Is this the one that was flushed? */ if (cf->tid == flush_tid) @@ -4182,7 +4187,11 @@ void ceph_handle_caps(struct ceph_mds_session *session, (unsigned)seq); if (!inode) { - dout(" i don't have ino %llx\n", vino.ino); + if (op == CEPH_CAP_OP_FLUSH_ACK) + pr_info("%s: can't find ino %llx:%llx for flush_ack!\n", + __func__, vino.snap, vino.ino); + else + dout(" i don't have ino %llx\n", vino.ino); if (op == CEPH_CAP_OP_IMPORT) { cap = ceph_get_cap(mdsc, NULL); @@ -4236,10 +4245,14 @@ void ceph_handle_caps(struct ceph_mds_session *session, spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ceph_inode(inode), session->s_mds); if (!cap) { - dout(" no cap on %p ino %llx.%llx from mds%d\n", + dout(" no cap on %p ino %llx:%llx from mds%d\n", inode, ceph_ino(inode), ceph_snap(inode), session->s_mds); spin_unlock(&ci->i_ceph_lock); + if (op == CEPH_CAP_OP_FLUSH_ACK) + pr_info("%s: no cap on %p ino %llx:%llx from mds%d for flush_ack!\n", + __func__, inode, ceph_ino(inode), + ceph_snap(inode), session->s_mds); goto flush_cap_releases; } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 63113e2a48907..3c7554d97093a 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -508,6 +508,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&ci->i_cap_snaps); ci->i_head_snapc = NULL; ci->i_snap_caps = 0; + ci->i_last_cap_flush_ack = 0; ci->i_last_rd = ci->i_last_wr = jiffies - 3600 * HZ; for (i = 0; i < CEPH_FILE_MODE_BITS; i++) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 00c3de177dd66..00e504bc096e7 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1991,6 +1991,25 @@ static int check_caps_flush(struct ceph_mds_client *mdsc, return ret; } +static void dump_cap_flushes(struct ceph_mds_client *mdsc, u64 want_tid) +{ + struct ceph_cap_flush *cf; + + pr_info("%s: still waiting for cap flushes through %llu:\n", + __func__, want_tid); + spin_lock(&mdsc->cap_dirty_lock); + list_for_each_entry(cf, &mdsc->cap_flush_list, g_list) { + if (cf->tid > want_tid) + break; + pr_info("%llx:%llx %s %llu %llu %d%s\n", + ceph_vinop(&cf->ci->vfs_inode), + ceph_cap_string(cf->caps), cf->tid, + cf->ci->i_last_cap_flush_ack, cf->wake, + cf->is_capsnap ? " is_capsnap" : ""); + } + spin_unlock(&mdsc->cap_dirty_lock); +} + /* * flush all dirty inode data to disk. * @@ -1999,10 +2018,20 @@ static int check_caps_flush(struct ceph_mds_client *mdsc, static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_tid) { + int i = 0; + long ret; + dout("check_caps_flush want %llu\n", want_flush_tid); - wait_event(mdsc->cap_flushing_wq, - check_caps_flush(mdsc, want_flush_tid)); + do { + ret = wait_event_timeout(mdsc->cap_flushing_wq, + check_caps_flush(mdsc, want_flush_tid), 60 * HZ); + if (ret == 0 && ++i < 5) + dump_cap_flushes(mdsc, want_flush_tid); + else if (ret == 1) + pr_info("%s: condition evaluated to true after timeout!\n", + __func__); + } while (ret == 0); dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid); } @@ -5083,6 +5112,8 @@ static void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg) mutex_lock(&mdsc->mutex); if (__verify_registered_session(mdsc, s) < 0) { + pr_info("%s: dropping tid %llu from unregistered session %d\n", + __func__, msg->hdr.tid, s->s_mds); mutex_unlock(&mdsc->mutex); goto out; } diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 322ee5add9426..db1433ce666e5 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -585,6 +585,7 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci, ceph_cap_string(dirty), capsnap->need_flush ? "" : "no_flush"); ihold(inode); + capsnap->cap_flush.ci = ci; capsnap->follows = old_snapc->seq; capsnap->issued = __ceph_caps_issued(ci, NULL); capsnap->dirty = dirty; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 20ceab74e8717..73db7f6021f3e 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -188,6 +188,7 @@ struct ceph_cap_flush { bool is_capsnap; /* true means capsnap */ struct list_head g_list; // global struct list_head i_list; // per inode + struct ceph_inode_info *ci; }; /* @@ -392,6 +393,7 @@ struct ceph_inode_info { struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or dirty|flushing caps */ unsigned i_snap_caps; /* cap bits for snapped files */ + u64 i_last_cap_flush_ack; /* latest cap flush_ack tid for this inode */ unsigned long i_last_rd; unsigned long i_last_wr;