]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-client.git/commitdiff
libceph: handle dead tcp connections during connection negotiation historic/wip-7139
authorIlya Dryomov <ilya.dryomov@inktank.com>
Thu, 16 Jan 2014 17:30:48 +0000 (19:30 +0200)
committerIlya Dryomov <ilya.dryomov@inktank.com>
Tue, 21 Jan 2014 10:40:17 +0000 (12:40 +0200)
Keepalive mechanism that we are currently using doesn't handle dead
(e.g. half-open in RFC 793 sense) TCP connections: a) it's based on
pending ceph_osd_requests which are not necessarily present, and b)
keepalive byte is only sent if connection is in CON_STATE_OPEN state
because of protocol restrictions.  This does not cover connection
handshake and negotiation stages and can lead to kernel client hanging
forever.  Fix it by forcibly resetting the connection if after two
mount_timeouts we still haven't transitioned to CON_STATE_OPEN.

Fixes: http://tracker.ceph.com/issues/7139
Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
include/linux/ceph/messenger.h
net/ceph/ceph_common.c
net/ceph/messenger.c

index 20ee8b63a96848ad1bc63fb29ce97c853502d700..25d3ec8bcddeb0f4e2f5549fde68a4afaec25c7c 100644 (file)
@@ -62,6 +62,8 @@ struct ceph_messenger {
 
        u64 supported_features;
        u64 required_features;
+
+       unsigned long connect_timeout;  /* jiffies */
 };
 
 enum ceph_msg_data_type {
@@ -240,6 +242,8 @@ struct ceph_connection {
 
        struct delayed_work work;           /* send|recv work */
        unsigned long       delay;          /* current delay interval */
+
+       struct delayed_work connect_timeout_work;
 };
 
 
@@ -257,6 +261,7 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr,
                        struct ceph_entity_addr *myaddr,
                        u64 supported_features,
                        u64 required_features,
+                       unsigned long connect_timeout,
                        bool nocrc);
 
 extern void ceph_con_init(struct ceph_connection *con, void *private,
index 67d7721d237e1638c21bc62da98f7275a9991026..bc41e5a392a4b0b738ffab507588d3178b72b583 100644 (file)
@@ -511,6 +511,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
        ceph_messenger_init(&client->msgr, myaddr,
                client->supported_features,
                client->required_features,
+               2 * client->options->mount_timeout * HZ,
                ceph_test_opt(client, NOCRC));
 
        /* subsystems */
index c7e0143d24f1f48f7a504e1f37be44e28a3ba9f6..65dea6c8c6f8a79b5c0858506b1ff24bda1722e3 100644 (file)
@@ -385,6 +385,16 @@ static void con_fault_raise(struct ceph_connection *con)
        queue_con(con);
 }
 
+static void handle_connect_timeout(struct work_struct *work)
+{
+       struct ceph_connection *con = container_of(work,
+                                                  struct ceph_connection,
+                                                  connect_timeout_work.work);
+
+       dout("%s connect timeout\n", __func__);
+       con_fault_raise(con);
+}
+
 
 /*
  * socket callback functions
@@ -447,6 +457,18 @@ static void ceph_sock_state_change(struct sock *sk)
        case TCP_ESTABLISHED:
                dout("%s TCP_ESTABLISHED\n", __func__);
                con_sock_state_connected(con);
+               /*
+                * If not told otherwise, arm connect_timeout hammer to
+                * be able to deal with half-open (in RFC 793 sense)
+                * sockets in the interim before we transition to
+                * CON_STATE_OPEN and regular request keepalive
+                * mechanism (ceph_con_keepalive()) kicks in.
+                */
+               if (con->msgr->connect_timeout) {
+                       dout("arming connect_timeout hammer\n");
+                       schedule_delayed_work(&con->connect_timeout_work,
+                                             con->msgr->connect_timeout);
+               }
                queue_con(con);
                break;
        default:        /* Everything else is uninteresting */
@@ -586,6 +608,13 @@ static int con_close_socket(struct ceph_connection *con)
        int rc = 0;
 
        dout("con_close_socket on %p sock %p\n", con, con->sock);
+
+       /*
+        * Sync with con_fault_raise() in handle_connect_timeout() to
+        * make sure that SOCK_CLOSED is going to be cleared for good.
+        */
+       cancel_delayed_work_sync(&con->connect_timeout_work);
+
        if (con->sock) {
                rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
                sock_release(con->sock);
@@ -595,8 +624,8 @@ static int con_close_socket(struct ceph_connection *con)
        /*
         * Forcibly clear the SOCK_CLOSED flag.  It gets set
         * independent of the connection mutex, and we could have
-        * received a socket close event before we had the chance to
-        * shut the socket down.
+        * received a socket close and/or connect_timeout event
+        * before we had the chance to shut the socket down.
         */
        con_flag_clear(con, CON_FLAG_SOCK_CLOSED);
 
@@ -725,6 +754,7 @@ void ceph_con_init(struct ceph_connection *con, void *private,
        INIT_LIST_HEAD(&con->out_queue);
        INIT_LIST_HEAD(&con->out_sent);
        INIT_DELAYED_WORK(&con->work, con_work);
+       INIT_DELAYED_WORK(&con->connect_timeout_work, handle_connect_timeout);
 
        con->state = CON_STATE_CLOSED;
 }
@@ -2076,6 +2106,7 @@ static int process_connect(struct ceph_connection *con)
 
                WARN_ON(con->state != CON_STATE_NEGOTIATING);
                con->state = CON_STATE_OPEN;
+
                con->auth_retry = 0;    /* we authenticated; clear flag */
                con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
                con->connect_seq++;
@@ -2092,6 +2123,11 @@ static int process_connect(struct ceph_connection *con)
 
                con->delay = 0;      /* reset backoff memory */
 
+               if (con->msgr->connect_timeout) {
+                       dout("disarming connect_timeout hammer\n");
+                       cancel_delayed_work(&con->connect_timeout_work);
+               }
+
                if (con->in_reply.tag == CEPH_MSGR_TAG_SEQ) {
                        prepare_write_seq(con);
                        prepare_read_seq(con);
@@ -2866,10 +2902,12 @@ void ceph_messenger_init(struct ceph_messenger *msgr,
                        struct ceph_entity_addr *myaddr,
                        u64 supported_features,
                        u64 required_features,
+                       unsigned long connect_timeout,
                        bool nocrc)
 {
        msgr->supported_features = supported_features;
        msgr->required_features = required_features;
+       msgr->connect_timeout = connect_timeout;
 
        spin_lock_init(&msgr->global_seq_lock);