]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-client.git/commitdiff
[DO NOT MERGE] ceph: dump info about cap flushes when we're waiting too long for...
authorJeff Layton <jlayton@kernel.org>
Wed, 7 Jul 2021 13:20:34 +0000 (09:20 -0400)
committerIlya Dryomov <idryomov@gmail.com>
Tue, 13 Feb 2024 10:25:12 +0000 (11:25 +0100)
We've had some cases of hung umounts in teuthology testing. It looks
like client is waiting for cap flushes to complete, but they aren't.

Add a field to the inode to track the highest cap flush tid seen for
that inode. Also, add a backpointer to the inode to the ceph_cap_flush
struct.

Change wait_caps_flush to wait 60s, and then dump info about the
condition of the list.

Also, print pr_info messages if we end up dropping a FLUSH_ACK for an
inode onto the floor, or if we get a message on an unregistered
session.

Reported-by: Patrick Donnelly <pdonnell@redhat.com>
URL: https://tracker.ceph.com/issues/51279
Signed-off-by: Jeff Layton <jlayton@kernel.org>
fs/ceph/caps.c
fs/ceph/inode.c
fs/ceph/mds_client.c
fs/ceph/snap.c
fs/ceph/super.h

index 7fb4aae97412464c54b42037f75016085d214bd3..d26edff6dd755976f5b4a8858e8e931768d11c8d 100644 (file)
@@ -1945,6 +1945,7 @@ static u64 __mark_caps_flushing(struct inode *inode,
        swap(cf, ci->i_prealloc_cap_flush);
        cf->caps = flushing;
        cf->wake = wake;
+       cf->ci = ci;
 
        spin_lock(&mdsc->cap_dirty_lock);
        list_del_init(&ci->i_dirty_item);
@@ -3819,6 +3820,10 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
        bool wake_ci = false;
        bool wake_mdsc = false;
 
+       /* track latest cap flush ack seen for this inode */
+       if (flush_tid > ci->i_last_cap_flush_ack)
+               ci->i_last_cap_flush_ack = flush_tid;
+
        list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) {
                /* Is this the one that was flushed? */
                if (cf->tid == flush_tid)
@@ -4450,7 +4455,11 @@ void ceph_handle_caps(struct ceph_mds_session *session,
              session->s_seq, (unsigned)seq);
 
        if (!inode) {
-               doutc(cl, " i don't have ino %llx\n", vino.ino);
+               if (op == CEPH_CAP_OP_FLUSH_ACK)
+                       pr_info_client(cl, "can't find ino %llx:%llx for flush_ack!\n",
+                                      vino.snap, vino.ino);
+               else
+                       doutc(cl, " i don't have ino %llx\n", vino.ino);
 
                switch (op) {
                case CEPH_CAP_OP_IMPORT:
@@ -4505,10 +4514,14 @@ void ceph_handle_caps(struct ceph_mds_session *session,
        spin_lock(&ci->i_ceph_lock);
        cap = __get_cap_for_mds(ceph_inode(inode), session->s_mds);
        if (!cap) {
-               doutc(cl, " no cap on %p ino %llx.%llx from mds%d\n",
+               doutc(cl, " no cap on %p ino %llx:%llx from mds%d\n",
                      inode, ceph_ino(inode), ceph_snap(inode),
                      session->s_mds);
                spin_unlock(&ci->i_ceph_lock);
+               if (op == CEPH_CAP_OP_FLUSH_ACK)
+                       pr_info_client(cl, "no cap on %p ino %llx:%llx from mds%d for flush_ack!\n",
+                                      inode, ceph_ino(inode),
+                                      ceph_snap(inode), session->s_mds);
                switch (op) {
                case CEPH_CAP_OP_REVOKE:
                case CEPH_CAP_OP_GRANT:
index 7b2e77517f235ecd47264061c04bae2e6d0b7c83..23057c3fd31e92b3d9126195c5b6312707ee6337 100644 (file)
@@ -624,6 +624,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        INIT_LIST_HEAD(&ci->i_cap_snaps);
        ci->i_head_snapc = NULL;
        ci->i_snap_caps = 0;
+       ci->i_last_cap_flush_ack = 0;
 
        ci->i_last_rd = ci->i_last_wr = jiffies - 3600 * HZ;
        for (i = 0; i < CEPH_FILE_MODE_BITS; i++)
index 3ab9c268a8bb398b779cc93d3da98f3d13df8fe3..968918bcefddac9d0481a5061a145cd313000b81 100644 (file)
@@ -2291,6 +2291,26 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
        return ret;
 }
 
+static void dump_cap_flushes(struct ceph_mds_client *mdsc, u64 want_tid)
+{
+       struct ceph_client *cl = mdsc->fsc->client;
+       struct ceph_cap_flush *cf;
+
+       pr_info_client(cl, "still waiting for cap flushes through %llu:\n",
+                      want_tid);
+       spin_lock(&mdsc->cap_dirty_lock);
+       list_for_each_entry(cf, &mdsc->cap_flush_list, g_list) {
+               if (cf->tid > want_tid)
+                       break;
+               pr_info_client(cl, "%llx:%llx %s %llu %llu %d%s\n",
+                              ceph_vinop(&cf->ci->netfs.inode),
+                              ceph_cap_string(cf->caps), cf->tid,
+                              cf->ci->i_last_cap_flush_ack, cf->wake,
+                              cf->is_capsnap ? " is_capsnap" : "");
+       }
+       spin_unlock(&mdsc->cap_dirty_lock);
+}
+
 /*
  * flush all dirty inode data to disk.
  *
@@ -2300,11 +2320,19 @@ static void wait_caps_flush(struct ceph_mds_client *mdsc,
                            u64 want_flush_tid)
 {
        struct ceph_client *cl = mdsc->fsc->client;
+       int i = 0;
+       long ret;
 
        doutc(cl, "want %llu\n", want_flush_tid);
 
-       wait_event(mdsc->cap_flushing_wq,
-                  check_caps_flush(mdsc, want_flush_tid));
+       do {
+               ret = wait_event_timeout(mdsc->cap_flushing_wq,
+                          check_caps_flush(mdsc, want_flush_tid), 60 * HZ);
+               if (ret == 0 && ++i < 5)
+                       dump_cap_flushes(mdsc, want_flush_tid);
+               else if (ret == 1)
+                       pr_info_client(cl, "condition evaluated to true after timeout!\n");
+       } while (ret == 0);
 
        doutc(cl, "ok, flushed thru %llu\n", want_flush_tid);
 }
@@ -5945,6 +5973,8 @@ static void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 
        mutex_lock(&mdsc->mutex);
        if (__verify_registered_session(mdsc, s) < 0) {
+               pr_info_client(cl, "dropping tid %llu from unregistered session %d\n",
+                              msg->hdr.tid, s->s_mds);
                mutex_unlock(&mdsc->mutex);
                goto out;
        }
index c65f2b202b2b3e8bf9055f046a5d9a848c13b6b0..d17904221e44906a835d413f4c8294dd83d1f238 100644 (file)
@@ -595,6 +595,7 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci,
              ceph_cap_string(dirty), capsnap->need_flush ? "" : "no_flush");
        ihold(inode);
 
+       capsnap->cap_flush.ci = ci;
        capsnap->follows = old_snapc->seq;
        capsnap->issued = __ceph_caps_issued(ci, NULL);
        capsnap->dirty = dirty;
index b63b4cd9b5b685a930bc33673f28e7c48a93d605..7eedf4d8a6db610d6a7d050f98795aaa38820795 100644 (file)
@@ -211,6 +211,7 @@ struct ceph_cap_flush {
        bool is_capsnap; /* true means capsnap */
        struct list_head g_list; // global
        struct list_head i_list; // per inode
+       struct ceph_inode_info *ci;
 };
 
 /*
@@ -416,6 +417,7 @@ struct ceph_inode_info {
        struct ceph_snap_context *i_head_snapc;  /* set if wr_buffer_head > 0 or
                                                    dirty|flushing caps */
        unsigned i_snap_caps;           /* cap bits for snapped files */
+       u64 i_last_cap_flush_ack;               /* latest cap flush_ack tid for this inode */
 
        unsigned long i_last_rd;
        unsigned long i_last_wr;