]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
kclient: less noisy about mds session timeouts
authorSage Weil <sage@newdream.net>
Mon, 3 Aug 2009 16:37:50 +0000 (09:37 -0700)
committerSage Weil <sage@newdream.net>
Mon, 3 Aug 2009 16:37:50 +0000 (09:37 -0700)
In particular, we don't want to print this multiple times.  Use
state to note when session appears to be hung, and pr_info on
transitions.

src/TODO
src/kernel/mds_client.c
src/kernel/mds_client.h

index fe06a53e6c8ba36071bca846bdedd0e8e54ccb14..3dbf931e659936700b86286ff909c8e5d47cdbc1 100644 (file)
--- a/src/TODO
+++ b/src/TODO
@@ -108,12 +108,12 @@ repair
 - mds scrubbing
 
 kclient
+- async writepage
 ?- ensure cap_snaps reflush after client reconnect 
 - fix up mds selection, and ESTALE handling
 - make cap import/export efficient
 - simplify mds auth tracking?
   - use caps instead?
-- unwind writeback start error in addr.c (see fixme)... by redirtying pages?
 - flock, fnctl locks
 - ACLs
 - should we try to ref CAP_PIN on special inodes that are open?  
index 7417d3e80825a99061ec956b9c2ffeb7d82aa87e..13081266107987f5d66d40949f2ebe390bf24069 100644 (file)
@@ -261,6 +261,7 @@ static const char *session_state_name(int s)
        case CEPH_MDS_SESSION_NEW: return "new";
        case CEPH_MDS_SESSION_OPENING: return "opening";
        case CEPH_MDS_SESSION_OPEN: return "open";
+       case CEPH_MDS_SESSION_HUNG: return "hung";
        case CEPH_MDS_SESSION_CLOSING: return "closing";
        case CEPH_MDS_SESSION_RECONNECTING: return "reconnecting";
        default: return "???";
@@ -1449,7 +1450,8 @@ static int __do_request(struct ceph_mds_client *mdsc,
                session = register_session(mdsc, mds);
        dout("do_request mds%d session %p state %s\n", mds, session,
             session_state_name(session->s_state));
-       if (session->s_state != CEPH_MDS_SESSION_OPEN) {
+       if (session->s_state != CEPH_MDS_SESSION_OPEN &&
+           session->s_state != CEPH_MDS_SESSION_HUNG) {
                if (session->s_state == CEPH_MDS_SESSION_NEW ||
                    session->s_state == CEPH_MDS_SESSION_CLOSING)
                        __open_session(mdsc, session);
@@ -1771,7 +1773,7 @@ void ceph_mdsc_handle_forward(struct ceph_mds_client *mdsc,
        int err = -EINVAL;
        void *p = msg->front.iov_base;
        void *end = p + msg->front.iov_len;
-       int from_mds;
+       int from_mds, state;
 
        if (le32_to_cpu(msg->hdr.src.name.type) != CEPH_ENTITY_TYPE_MDS)
                goto bad;
@@ -1790,12 +1792,14 @@ void ceph_mdsc_handle_forward(struct ceph_mds_client *mdsc,
                goto out;  /* dup reply? */
        }
 
+       state = mdsc->sessions[next_mds]->s_state;
        if (fwd_seq <= req->r_num_fwd) {
                dout("forward %llu to mds%d - old seq %d <= %d\n",
                     tid, next_mds, req->r_num_fwd, fwd_seq);
        } else if (!must_resend &&
                   __have_session(mdsc, next_mds) &&
-                  mdsc->sessions[next_mds]->s_state == CEPH_MDS_SESSION_OPEN) {
+                  (state == CEPH_MDS_SESSION_OPEN ||
+                   state == CEPH_MDS_SESSION_HUNG)) {
                /* yes.  adjust our sessions, but that's all; the old mds
                 * forwarded our message for us. */
                dout("forward %llu to mds%d (mds%d fwded)\n", tid, next_mds,
@@ -1866,6 +1870,12 @@ void ceph_mdsc_handle_session(struct ceph_mds_client *mdsc,
        dout("handle_session mds%d %s %p state %s seq %llu\n",
             mds, ceph_session_op_name(op), session,
             session_state_name(session->s_state), seq);
+
+       if (session->s_state == CEPH_MDS_SESSION_HUNG) {
+               session->s_state = CEPH_MDS_SESSION_OPEN;
+               pr_info("ceph mds%d session came back\n", session->s_mds);
+       }
+
        switch (op) {
        case CEPH_SESSION_OPEN:
                session->s_state = CEPH_MDS_SESSION_OPEN;
@@ -2531,8 +2541,11 @@ static void delayed_work(struct work_struct *work)
                        continue;
                }
                if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
-                       pr_info("ceph mds%d session probably timed out, "
-                               "requesting mds map\n", s->s_mds);
+                       if (s->s_state == CEPH_MDS_SESSION_OPEN) {
+                               s->s_state = CEPH_MDS_SESSION_HUNG;
+                               pr_info("ceph mds%d session probably timed out,"
+                                       " requesting mds map\n", s->s_mds);
+                       }
                        want_map = mdsc->mdsmap->m_epoch + 1;
                }
                if (s->s_state < CEPH_MDS_SESSION_OPEN) {
index 2361f461083d302ae8a267ee36d119a43c545bf1..bf6fb9984c6efbf1000ce0821b52c3fdfa789607 100644 (file)
@@ -84,6 +84,7 @@ enum {
        CEPH_MDS_SESSION_NEW = 1,
        CEPH_MDS_SESSION_OPENING = 2,
        CEPH_MDS_SESSION_OPEN = 3,
+       CEPH_MDS_SESSION_HUNG = 4,
        CEPH_MDS_SESSION_CLOSING = 5,
        CEPH_MDS_SESSION_RECONNECTING = 6
 };