]> git.apps.os.sepia.ceph.com Git - ceph-client.git/commitdiff
[DO NOT MERGE] ceph: dump info about cap flushes when we're waiting too long for...
authorJeff Layton <jlayton@kernel.org>
Wed, 7 Jul 2021 13:20:34 +0000 (09:20 -0400)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 15 Aug 2022 12:51:40 +0000 (14:51 +0200)
We've had some cases of hung umounts in teuthology testing. It looks
like client is waiting for cap flushes to complete, but they aren't.

Add a field to the inode to track the highest cap flush tid seen for
that inode. Also, add a backpointer to the inode to the ceph_cap_flush
struct.

Change wait_caps_flush to wait 60s, and then dump info about the
condition of the list.

Also, print pr_info messages if we end up dropping a FLUSH_ACK for an
inode onto the floor, or if we get a message on an unregistered
session.

Reported-by: Patrick Donnelly <pdonnell@redhat.com>
URL: https://tracker.ceph.com/issues/51279
Signed-off-by: Jeff Layton <jlayton@kernel.org>
fs/ceph/caps.c
fs/ceph/inode.c
fs/ceph/mds_client.c
fs/ceph/snap.c
fs/ceph/super.h

index 53cfe026b3ea5353172c354279d9772e449fbe01..511d1963aa0942a8ab4a3362e582d63b013029e8 100644 (file)
@@ -1827,6 +1827,7 @@ static u64 __mark_caps_flushing(struct inode *inode,
        swap(cf, ci->i_prealloc_cap_flush);
        cf->caps = flushing;
        cf->wake = wake;
+       cf->ci = ci;
 
        spin_lock(&mdsc->cap_dirty_lock);
        list_del_init(&ci->i_dirty_item);
@@ -3647,6 +3648,10 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
        bool wake_ci = false;
        bool wake_mdsc = false;
 
+       /* track latest cap flush ack seen for this inode */
+       if (flush_tid > ci->i_last_cap_flush_ack)
+               ci->i_last_cap_flush_ack = flush_tid;
+
        list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) {
                /* Is this the one that was flushed? */
                if (cf->tid == flush_tid)
@@ -4201,7 +4206,11 @@ void ceph_handle_caps(struct ceph_mds_session *session,
             (unsigned)seq);
 
        if (!inode) {
-               dout(" i don't have ino %llx\n", vino.ino);
+               if (op == CEPH_CAP_OP_FLUSH_ACK)
+                       pr_info("%s: can't find ino %llx:%llx for flush_ack!\n",
+                               __func__, vino.snap, vino.ino);
+               else
+                       dout(" i don't have ino %llx\n", vino.ino);
 
                if (op == CEPH_CAP_OP_IMPORT) {
                        cap = ceph_get_cap(mdsc, NULL);
@@ -4255,10 +4264,14 @@ void ceph_handle_caps(struct ceph_mds_session *session,
        spin_lock(&ci->i_ceph_lock);
        cap = __get_cap_for_mds(ceph_inode(inode), session->s_mds);
        if (!cap) {
-               dout(" no cap on %p ino %llx.%llx from mds%d\n",
+               dout(" no cap on %p ino %llx:%llx from mds%d\n",
                     inode, ceph_ino(inode), ceph_snap(inode),
                     session->s_mds);
                spin_unlock(&ci->i_ceph_lock);
+               if (op == CEPH_CAP_OP_FLUSH_ACK)
+                       pr_info("%s: no cap on %p ino %llx:%llx from mds%d for flush_ack!\n",
+                               __func__, inode, ceph_ino(inode),
+                               ceph_snap(inode), session->s_mds);
                goto flush_cap_releases;
        }
 
index 42351d7a0dd6b74b3a246ad472db6f35c6742c11..6200d18345833358aa3e333ff750e6d105b2ea23 100644 (file)
@@ -508,6 +508,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        INIT_LIST_HEAD(&ci->i_cap_snaps);
        ci->i_head_snapc = NULL;
        ci->i_snap_caps = 0;
+       ci->i_last_cap_flush_ack = 0;
 
        ci->i_last_rd = ci->i_last_wr = jiffies - 3600 * HZ;
        for (i = 0; i < CEPH_FILE_MODE_BITS; i++)
index 80f8b9ec1a312d839111370a0ad5e1e15bdd5f27..9d1898c1a9254340c1cd8fe41f7116d3813ab211 100644 (file)
@@ -2067,6 +2067,25 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
        return ret;
 }
 
+static void dump_cap_flushes(struct ceph_mds_client *mdsc, u64 want_tid)
+{
+       struct ceph_cap_flush *cf;
+
+       pr_info("%s: still waiting for cap flushes through %llu:\n",
+               __func__, want_tid);
+       spin_lock(&mdsc->cap_dirty_lock);
+       list_for_each_entry(cf, &mdsc->cap_flush_list, g_list) {
+               if (cf->tid > want_tid)
+                       break;
+               pr_info("%llx:%llx %s %llu %llu %d%s\n",
+                       ceph_vinop(&cf->ci->netfs.inode),
+                       ceph_cap_string(cf->caps), cf->tid,
+                       cf->ci->i_last_cap_flush_ack, cf->wake,
+                       cf->is_capsnap ? " is_capsnap" : "");
+       }
+       spin_unlock(&mdsc->cap_dirty_lock);
+}
+
 /*
  * flush all dirty inode data to disk.
  *
@@ -2075,10 +2094,20 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
 static void wait_caps_flush(struct ceph_mds_client *mdsc,
                            u64 want_flush_tid)
 {
+       int i = 0;
+       long ret;
+
        dout("check_caps_flush want %llu\n", want_flush_tid);
 
-       wait_event(mdsc->cap_flushing_wq,
-                  check_caps_flush(mdsc, want_flush_tid));
+       do {
+               ret = wait_event_timeout(mdsc->cap_flushing_wq,
+                          check_caps_flush(mdsc, want_flush_tid), 60 * HZ);
+               if (ret == 0 && ++i < 5)
+                       dump_cap_flushes(mdsc, want_flush_tid);
+               else if (ret == 1)
+                       pr_info("%s: condition evaluated to true after timeout!\n",
+                                 __func__);
+       } while (ret == 0);
 
        dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid);
 }
@@ -5301,6 +5330,8 @@ static void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 
        mutex_lock(&mdsc->mutex);
        if (__verify_registered_session(mdsc, s) < 0) {
+               pr_info("%s: dropping tid %llu from unregistered session %d\n",
+                       __func__, msg->hdr.tid, s->s_mds);
                mutex_unlock(&mdsc->mutex);
                goto out;
        }
index 864cdaa0d2bd659e43459ddfdecf6f5fd8bcc7b0..9bceed2ebda39d67b82e2836b3caf6f7fb6cef5f 100644 (file)
@@ -585,6 +585,7 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci,
             ceph_cap_string(dirty), capsnap->need_flush ? "" : "no_flush");
        ihold(inode);
 
+       capsnap->cap_flush.ci = ci;
        capsnap->follows = old_snapc->seq;
        capsnap->issued = __ceph_caps_issued(ci, NULL);
        capsnap->dirty = dirty;
index 40630e6f691c787b1be9f52d3f7831212f677880..bf5bb3f4074042f3d56f712cd1b0000974681460 100644 (file)
@@ -194,6 +194,7 @@ struct ceph_cap_flush {
        bool is_capsnap; /* true means capsnap */
        struct list_head g_list; // global
        struct list_head i_list; // per inode
+       struct ceph_inode_info *ci;
 };
 
 /*
@@ -399,6 +400,7 @@ struct ceph_inode_info {
        struct ceph_snap_context *i_head_snapc;  /* set if wr_buffer_head > 0 or
                                                    dirty|flushing caps */
        unsigned i_snap_caps;           /* cap bits for snapped files */
+       u64 i_last_cap_flush_ack;               /* latest cap flush_ack tid for this inode */
 
        unsigned long i_last_rd;
        unsigned long i_last_wr;