]> git.apps.os.sepia.ceph.com Git - ceph-client.git/commitdiff
[DO NOT MERGE] ceph: dump info about cap flushes when we're waiting too long for...
authorJeff Layton <jlayton@kernel.org>
Wed, 7 Jul 2021 13:20:34 +0000 (09:20 -0400)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 25 Apr 2022 09:07:47 +0000 (11:07 +0200)
We've had some cases of hung umounts in teuthology testing. It looks
like client is waiting for cap flushes to complete, but they aren't.

Add a field to the inode to track the highest cap flush tid seen for
that inode. Also, add a backpointer to the inode to the ceph_cap_flush
struct.

Change wait_caps_flush to wait 60s, and then dump info about the
condition of the list.

Also, print pr_info messages if we end up dropping a FLUSH_ACK for an
inode onto the floor, or if we get a message on an unregistered
session.

Reported-by: Patrick Donnelly <pdonnell@redhat.com>
URL: https://tracker.ceph.com/issues/51279
Signed-off-by: Jeff Layton <jlayton@kernel.org>
fs/ceph/caps.c
fs/ceph/inode.c
fs/ceph/mds_client.c
fs/ceph/super.h

index 5c14ef04e474247e3d98fbdacfef017c5950d217..b8d0e7610e1033c929b8223308249da61994314c 100644 (file)
@@ -1826,6 +1826,7 @@ static u64 __mark_caps_flushing(struct inode *inode,
        swap(cf, ci->i_prealloc_cap_flush);
        cf->caps = flushing;
        cf->wake = wake;
+       cf->ci = ci;
 
        spin_lock(&mdsc->cap_dirty_lock);
        list_del_init(&ci->i_dirty_item);
@@ -3627,6 +3628,10 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
        bool wake_ci = false;
        bool wake_mdsc = false;
 
+       /* track latest cap flush ack seen for this inode */
+       if (flush_tid > ci->i_last_cap_flush_ack)
+               ci->i_last_cap_flush_ack = flush_tid;
+
        list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) {
                /* Is this the one that was flushed? */
                if (cf->tid == flush_tid)
@@ -4182,7 +4187,11 @@ void ceph_handle_caps(struct ceph_mds_session *session,
             (unsigned)seq);
 
        if (!inode) {
-               dout(" i don't have ino %llx\n", vino.ino);
+               if (op == CEPH_CAP_OP_FLUSH_ACK)
+                       pr_info("%s: can't find ino %llx:%llx for flush_ack!\n",
+                               __func__, vino.snap, vino.ino);
+               else
+                       dout(" i don't have ino %llx\n", vino.ino);
 
                if (op == CEPH_CAP_OP_IMPORT) {
                        cap = ceph_get_cap(mdsc, NULL);
@@ -4236,10 +4245,14 @@ void ceph_handle_caps(struct ceph_mds_session *session,
        spin_lock(&ci->i_ceph_lock);
        cap = __get_cap_for_mds(ceph_inode(inode), session->s_mds);
        if (!cap) {
-               dout(" no cap on %p ino %llx.%llx from mds%d\n",
+               dout(" no cap on %p ino %llx:%llx from mds%d\n",
                     inode, ceph_ino(inode), ceph_snap(inode),
                     session->s_mds);
                spin_unlock(&ci->i_ceph_lock);
+               if (op == CEPH_CAP_OP_FLUSH_ACK)
+                       pr_info("%s: no cap on %p ino %llx:%llx from mds%d for flush_ack!\n",
+                               __func__, inode, ceph_ino(inode),
+                               ceph_snap(inode), session->s_mds);
                goto flush_cap_releases;
        }
 
index 63113e2a4890763e255048c31a0b228cdba13a22..3c7554d97093a4c2aa0234cc7e72cbaa96ca35a7 100644 (file)
@@ -508,6 +508,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        INIT_LIST_HEAD(&ci->i_cap_snaps);
        ci->i_head_snapc = NULL;
        ci->i_snap_caps = 0;
+       ci->i_last_cap_flush_ack = 0;
 
        ci->i_last_rd = ci->i_last_wr = jiffies - 3600 * HZ;
        for (i = 0; i < CEPH_FILE_MODE_BITS; i++)
index 00c3de177dd66f5eb90451558fcfbf9e7a67a240..459c6f23915f3a0472e5bf359c6090152a3947cb 100644 (file)
@@ -1991,6 +1991,24 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
        return ret;
 }
 
+static void dump_cap_flushes(struct ceph_mds_client *mdsc, u64 want_tid)
+{
+       struct ceph_cap_flush *cf;
+
+       pr_info("%s: still waiting for cap flushes through %llu:\n",
+               __func__, want_tid);
+       spin_lock(&mdsc->cap_dirty_lock);
+       list_for_each_entry(cf, &mdsc->cap_flush_list, g_list) {
+               if (cf->tid > want_tid)
+                       break;
+               pr_info("%llx:%llx %s %llu %llu %d\n",
+                       ceph_vinop(&cf->ci->vfs_inode),
+                       ceph_cap_string(cf->caps), cf->tid,
+                       cf->ci->i_last_cap_flush_ack, cf->wake);
+       }
+       spin_unlock(&mdsc->cap_dirty_lock);
+}
+
 /*
  * flush all dirty inode data to disk.
  *
@@ -1999,10 +2017,20 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
 static void wait_caps_flush(struct ceph_mds_client *mdsc,
                            u64 want_flush_tid)
 {
+       int i = 0;
+       long ret;
+
        dout("check_caps_flush want %llu\n", want_flush_tid);
 
-       wait_event(mdsc->cap_flushing_wq,
-                  check_caps_flush(mdsc, want_flush_tid));
+       do {
+               ret = wait_event_timeout(mdsc->cap_flushing_wq,
+                          check_caps_flush(mdsc, want_flush_tid), 60 * HZ);
+               if (ret == 0 && ++i < 5)
+                       dump_cap_flushes(mdsc, want_flush_tid);
+               else if (ret == 1)
+                       pr_info("%s: condition evaluated to true after timeout!\n",
+                                 __func__);
+       } while (ret == 0);
 
        dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid);
 }
@@ -5083,6 +5111,8 @@ static void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 
        mutex_lock(&mdsc->mutex);
        if (__verify_registered_session(mdsc, s) < 0) {
+               pr_info("%s: dropping tid %llu from unregistered session %d\n",
+                       __func__, msg->hdr.tid, s->s_mds);
                mutex_unlock(&mdsc->mutex);
                goto out;
        }
index 20ceab74e87178ef03acccad052d812fa7d147ce..73db7f6021f3e2facaf377d7ea537559b36d5e61 100644 (file)
@@ -188,6 +188,7 @@ struct ceph_cap_flush {
        bool is_capsnap; /* true means capsnap */
        struct list_head g_list; // global
        struct list_head i_list; // per inode
+       struct ceph_inode_info *ci;
 };
 
 /*
@@ -392,6 +393,7 @@ struct ceph_inode_info {
        struct ceph_snap_context *i_head_snapc;  /* set if wr_buffer_head > 0 or
                                                    dirty|flushing caps */
        unsigned i_snap_caps;           /* cap bits for snapped files */
+       u64 i_last_cap_flush_ack;               /* latest cap flush_ack tid for this inode */
 
        unsigned long i_last_rd;
        unsigned long i_last_wr;