]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-client.git/commitdiff
[DO NOT MERGE] ceph: dump info about cap flushes when we're waiting too long for...
authorJeff Layton <jlayton@kernel.org>
Wed, 7 Jul 2021 13:20:34 +0000 (09:20 -0400)
committerIlya Dryomov <idryomov@gmail.com>
Fri, 6 Jun 2025 09:12:28 +0000 (11:12 +0200)
We've had some cases of hung umounts in teuthology testing. It looks
like client is waiting for cap flushes to complete, but they aren't.

Add a field to the inode to track the highest cap flush tid seen for
that inode. Also, add a backpointer to the inode to the ceph_cap_flush
struct.

Change wait_caps_flush to wait 60s, and then dump info about the
condition of the list.

Also, print pr_info messages if we end up dropping a FLUSH_ACK for an
inode onto the floor, or if we get a message on an unregistered
session.

Reported-by: Patrick Donnelly <pdonnell@redhat.com>
URL: https://tracker.ceph.com/issues/51279
Signed-off-by: Jeff Layton <jlayton@kernel.org>
fs/ceph/caps.c
fs/ceph/inode.c
fs/ceph/mds_client.c
fs/ceph/snap.c
fs/ceph/super.h

index a8d8b56cf9d2113cfaeec9ae681cc486b68d1afa..5e7c9bd4dfa2c023099056c9aedaad4ef05eafef 100644 (file)
@@ -1932,6 +1932,7 @@ static u64 __mark_caps_flushing(struct inode *inode,
        swap(cf, ci->i_prealloc_cap_flush);
        cf->caps = flushing;
        cf->wake = wake;
+       cf->ci = ci;
 
        spin_lock(&mdsc->cap_dirty_lock);
        list_del_init(&ci->i_dirty_item);
@@ -3825,6 +3826,10 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
        bool wake_ci = false;
        bool wake_mdsc = false;
 
+       /* track latest cap flush ack seen for this inode */
+       if (flush_tid > ci->i_last_cap_flush_ack)
+               ci->i_last_cap_flush_ack = flush_tid;
+
        list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) {
                /* Is this the one that was flushed? */
                if (cf->tid == flush_tid)
@@ -3984,8 +3989,8 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
        list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) {
                if (iter->follows == follows) {
                        if (iter->cap_flush.tid != flush_tid) {
-                               doutc(cl, " cap_snap %p follows %lld "
-                                     "tid %lld != %lld\n", iter,
+                               pr_info_client(cl, " %llx.%llx cap_snap %p follows %lld "
+                                     "tid %lld != %lld\n", ceph_vinop(inode), iter,
                                      follows, flush_tid,
                                      iter->cap_flush.tid);
                                break;
@@ -3993,8 +3998,10 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
                        capsnap = iter;
                        break;
                } else {
-                       doutc(cl, " skipping cap_snap %p follows %lld\n",
-                             iter, iter->follows);
+                       pr_info_client(cl, " %llx.%llx skipping cap_snap %p follows %lld, "
+                             "iter->follows %lld tid %lld iter->cap_flush.tid %lld\n",
+                             ceph_vinop(inode), iter, follows, iter->follows, flush_tid,
+                             iter->cap_flush.tid);
                }
        }
        if (capsnap)
@@ -4455,7 +4462,11 @@ void ceph_handle_caps(struct ceph_mds_session *session,
        mutex_lock(&session->s_mutex);
 
        if (!inode) {
-               doutc(cl, " i don't have ino %llx\n", vino.ino);
+               if (op == CEPH_CAP_OP_FLUSH_ACK)
+                       pr_info_client(cl, "can't find ino %llx:%llx for flush_ack!\n",
+                                      vino.snap, vino.ino);
+               else
+                       doutc(cl, " i don't have ino %llx\n", vino.ino);
 
                switch (op) {
                case CEPH_CAP_OP_IMPORT:
@@ -4510,10 +4521,14 @@ void ceph_handle_caps(struct ceph_mds_session *session,
        spin_lock(&ci->i_ceph_lock);
        cap = __get_cap_for_mds(ceph_inode(inode), session->s_mds);
        if (!cap) {
-               doutc(cl, " no cap on %p ino %llx.%llx from mds%d\n",
+               doutc(cl, " no cap on %p ino %llx:%llx from mds%d\n",
                      inode, ceph_ino(inode), ceph_snap(inode),
                      session->s_mds);
                spin_unlock(&ci->i_ceph_lock);
+               if (op == CEPH_CAP_OP_FLUSH_ACK)
+                       pr_info_client(cl, "no cap on %p ino %llx:%llx from mds%d for flush_ack!\n",
+                                      inode, ceph_ino(inode),
+                                      ceph_snap(inode), session->s_mds);
                switch (op) {
                case CEPH_CAP_OP_REVOKE:
                case CEPH_CAP_OP_GRANT:
index 06cd2963e41ee0e65e0b7920287130e04f4fb51f..c46fa6ed11c19c5a24ba4bceb8ae00649458c4cd 100644 (file)
@@ -624,6 +624,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        INIT_LIST_HEAD(&ci->i_cap_snaps);
        ci->i_head_snapc = NULL;
        ci->i_snap_caps = 0;
+       ci->i_last_cap_flush_ack = 0;
 
        ci->i_last_rd = ci->i_last_wr = jiffies - 3600 * HZ;
        for (i = 0; i < CEPH_FILE_MODE_BITS; i++)
index 230e0c3f341f7119db7a189a5a378772cc4250cd..542fa0c15c51cf5c70d238af013e7392fed4a8f4 100644 (file)
@@ -2283,6 +2283,26 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
        return ret;
 }
 
+static void dump_cap_flushes(struct ceph_mds_client *mdsc, u64 want_tid)
+{
+       struct ceph_client *cl = mdsc->fsc->client;
+       struct ceph_cap_flush *cf;
+
+       pr_info_client(cl, "still waiting for cap flushes through %llu:\n",
+                      want_tid);
+       spin_lock(&mdsc->cap_dirty_lock);
+       list_for_each_entry(cf, &mdsc->cap_flush_list, g_list) {
+               if (cf->tid > want_tid)
+                       break;
+               pr_info_client(cl, "%llx:%llx %s %llu %llu %d%s\n",
+                              ceph_vinop(&cf->ci->netfs.inode),
+                              ceph_cap_string(cf->caps), cf->tid,
+                              cf->ci->i_last_cap_flush_ack, cf->wake,
+                              cf->is_capsnap ? " is_capsnap" : "");
+       }
+       spin_unlock(&mdsc->cap_dirty_lock);
+}
+
 /*
  * flush all dirty inode data to disk.
  *
@@ -2292,11 +2312,19 @@ static void wait_caps_flush(struct ceph_mds_client *mdsc,
                            u64 want_flush_tid)
 {
        struct ceph_client *cl = mdsc->fsc->client;
+       int i = 0;
+       long ret;
 
        doutc(cl, "want %llu\n", want_flush_tid);
 
-       wait_event(mdsc->cap_flushing_wq,
-                  check_caps_flush(mdsc, want_flush_tid));
+       do {
+               ret = wait_event_timeout(mdsc->cap_flushing_wq,
+                          check_caps_flush(mdsc, want_flush_tid), 60 * HZ);
+               if (ret == 0 && ++i < 5)
+                       dump_cap_flushes(mdsc, want_flush_tid);
+               else if (ret == 1)
+                       pr_info_client(cl, "condition evaluated to true after timeout!\n");
+       } while (ret == 0);
 
        doutc(cl, "ok, flushed thru %llu\n", want_flush_tid);
 }
@@ -6228,6 +6256,8 @@ static void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 
        mutex_lock(&mdsc->mutex);
        if (__verify_registered_session(mdsc, s) < 0) {
+               pr_info_client(cl, "dropping tid %llu from unregistered session %d\n",
+                              msg->hdr.tid, s->s_mds);
                mutex_unlock(&mdsc->mutex);
                goto out;
        }
index c65f2b202b2b3e8bf9055f046a5d9a848c13b6b0..d17904221e44906a835d413f4c8294dd83d1f238 100644 (file)
@@ -595,6 +595,7 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci,
              ceph_cap_string(dirty), capsnap->need_flush ? "" : "no_flush");
        ihold(inode);
 
+       capsnap->cap_flush.ci = ci;
        capsnap->follows = old_snapc->seq;
        capsnap->issued = __ceph_caps_issued(ci, NULL);
        capsnap->dirty = dirty;
index bb0db0cc8003947c5ff8595a5aba3c74d8c0632d..4a54fc273ec1d51d14747f08dfce610aad66b0a3 100644 (file)
@@ -212,6 +212,7 @@ struct ceph_cap_flush {
        bool is_capsnap; /* true means capsnap */
        struct list_head g_list; // global
        struct list_head i_list; // per inode
+       struct ceph_inode_info *ci;
 };
 
 /*
@@ -417,6 +418,7 @@ struct ceph_inode_info {
        struct ceph_snap_context *i_head_snapc;  /* set if wr_buffer_head > 0 or
                                                    dirty|flushing caps */
        unsigned i_snap_caps;           /* cap bits for snapped files */
+       u64 i_last_cap_flush_ack;               /* latest cap flush_ack tid for this inode */
 
        unsigned long i_last_rd;
        unsigned long i_last_wr;