From: Jeff Layton Date: Wed, 7 Jul 2021 13:20:34 +0000 (-0400) Subject: [DO NOT MERGE] ceph: dump info about cap flushes when we're waiting too long for... X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=d727e939548d13edf9b202e34e1023a0a61577c5;p=ceph-client.git [DO NOT MERGE] ceph: dump info about cap flushes when we're waiting too long for them We've had some cases of hung umounts in teuthology testing. It looks like client is waiting for cap flushes to complete, but they aren't. Add a field to the inode to track the highest cap flush tid seen for that inode. Also, add a backpointer to the inode to the ceph_cap_flush struct. Change wait_caps_flush to wait 60s, and then dump info about the condition of the list. Also, print pr_info messages if we end up dropping a FLUSH_ACK for an inode onto the floor, or if we get a message on an unregistered session. Reported-by: Patrick Donnelly URL: https://tracker.ceph.com/issues/51279 Signed-off-by: Jeff Layton --- diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 53cfe026b3ea5..511d1963aa094 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1827,6 +1827,7 @@ static u64 __mark_caps_flushing(struct inode *inode, swap(cf, ci->i_prealloc_cap_flush); cf->caps = flushing; cf->wake = wake; + cf->ci = ci; spin_lock(&mdsc->cap_dirty_lock); list_del_init(&ci->i_dirty_item); @@ -3647,6 +3648,10 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, bool wake_ci = false; bool wake_mdsc = false; + /* track latest cap flush ack seen for this inode */ + if (flush_tid > ci->i_last_cap_flush_ack) + ci->i_last_cap_flush_ack = flush_tid; + list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) { /* Is this the one that was flushed? */ if (cf->tid == flush_tid) @@ -4201,7 +4206,11 @@ void ceph_handle_caps(struct ceph_mds_session *session, (unsigned)seq); if (!inode) { - dout(" i don't have ino %llx\n", vino.ino); + if (op == CEPH_CAP_OP_FLUSH_ACK) + pr_info("%s: can't find ino %llx:%llx for flush_ack!\n", + __func__, vino.snap, vino.ino); + else + dout(" i don't have ino %llx\n", vino.ino); if (op == CEPH_CAP_OP_IMPORT) { cap = ceph_get_cap(mdsc, NULL); @@ -4255,10 +4264,14 @@ void ceph_handle_caps(struct ceph_mds_session *session, spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ceph_inode(inode), session->s_mds); if (!cap) { - dout(" no cap on %p ino %llx.%llx from mds%d\n", + dout(" no cap on %p ino %llx:%llx from mds%d\n", inode, ceph_ino(inode), ceph_snap(inode), session->s_mds); spin_unlock(&ci->i_ceph_lock); + if (op == CEPH_CAP_OP_FLUSH_ACK) + pr_info("%s: no cap on %p ino %llx:%llx from mds%d for flush_ack!\n", + __func__, inode, ceph_ino(inode), + ceph_snap(inode), session->s_mds); goto flush_cap_releases; } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 42351d7a0dd6b..6200d18345833 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -508,6 +508,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&ci->i_cap_snaps); ci->i_head_snapc = NULL; ci->i_snap_caps = 0; + ci->i_last_cap_flush_ack = 0; ci->i_last_rd = ci->i_last_wr = jiffies - 3600 * HZ; for (i = 0; i < CEPH_FILE_MODE_BITS; i++) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 80f8b9ec1a312..9d1898c1a9254 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2067,6 +2067,25 @@ static int check_caps_flush(struct ceph_mds_client *mdsc, return ret; } +static void dump_cap_flushes(struct ceph_mds_client *mdsc, u64 want_tid) +{ + struct ceph_cap_flush *cf; + + pr_info("%s: still waiting for cap flushes through %llu:\n", + __func__, want_tid); + spin_lock(&mdsc->cap_dirty_lock); + list_for_each_entry(cf, &mdsc->cap_flush_list, g_list) { + if (cf->tid > want_tid) + break; + pr_info("%llx:%llx %s %llu %llu %d%s\n", + ceph_vinop(&cf->ci->netfs.inode), + ceph_cap_string(cf->caps), cf->tid, + cf->ci->i_last_cap_flush_ack, cf->wake, + cf->is_capsnap ? " is_capsnap" : ""); + } + spin_unlock(&mdsc->cap_dirty_lock); +} + /* * flush all dirty inode data to disk. * @@ -2075,10 +2094,20 @@ static int check_caps_flush(struct ceph_mds_client *mdsc, static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_tid) { + int i = 0; + long ret; + dout("check_caps_flush want %llu\n", want_flush_tid); - wait_event(mdsc->cap_flushing_wq, - check_caps_flush(mdsc, want_flush_tid)); + do { + ret = wait_event_timeout(mdsc->cap_flushing_wq, + check_caps_flush(mdsc, want_flush_tid), 60 * HZ); + if (ret == 0 && ++i < 5) + dump_cap_flushes(mdsc, want_flush_tid); + else if (ret == 1) + pr_info("%s: condition evaluated to true after timeout!\n", + __func__); + } while (ret == 0); dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid); } @@ -5301,6 +5330,8 @@ static void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg) mutex_lock(&mdsc->mutex); if (__verify_registered_session(mdsc, s) < 0) { + pr_info("%s: dropping tid %llu from unregistered session %d\n", + __func__, msg->hdr.tid, s->s_mds); mutex_unlock(&mdsc->mutex); goto out; } diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 864cdaa0d2bd6..9bceed2ebda39 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -585,6 +585,7 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci, ceph_cap_string(dirty), capsnap->need_flush ? "" : "no_flush"); ihold(inode); + capsnap->cap_flush.ci = ci; capsnap->follows = old_snapc->seq; capsnap->issued = __ceph_caps_issued(ci, NULL); capsnap->dirty = dirty; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 40630e6f691c7..bf5bb3f407404 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -194,6 +194,7 @@ struct ceph_cap_flush { bool is_capsnap; /* true means capsnap */ struct list_head g_list; // global struct list_head i_list; // per inode + struct ceph_inode_info *ci; }; /* @@ -399,6 +400,7 @@ struct ceph_inode_info { struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or dirty|flushing caps */ unsigned i_snap_caps; /* cap bits for snapped files */ + u64 i_last_cap_flush_ack; /* latest cap flush_ack tid for this inode */ unsigned long i_last_rd; unsigned long i_last_wr;