From 2d4dca757eaa7572ed4d8a1c798c9c09e5ada3d7 Mon Sep 17 00:00:00 2001 From: Wido den Hollander Date: Tue, 18 Nov 2014 14:49:00 +0100 Subject: [PATCH] SimpleMessenger: Retry binding on addresses if binding fails If binding on a IP-Address fails, delay and retry again. This happens mainly on IPv6 deployments. Due to DAD (Duplicate Address Detection) or SLAAC it can be that IPv6 is not yet available when the daemons start. Monitor daemons try to bind on a static IPv6 address and that might not be available yet and that causes the monitor not to start. Fixes: #10029 --- src/common/config_opts.h | 2 + src/msg/simple/Accepter.cc | 91 ++++++++++++++++++++++++-------------- 2 files changed, 59 insertions(+), 34 deletions(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index d34a147b4c9..bc00d6a2e21 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -128,6 +128,8 @@ OPTION(ms_dispatch_throttle_bytes, OPT_U64, 100 << 20) OPTION(ms_bind_ipv6, OPT_BOOL, false) OPTION(ms_bind_port_min, OPT_INT, 6800) OPTION(ms_bind_port_max, OPT_INT, 7300) +OPTION(ms_bind_retry_count, OPT_INT, 3) // If binding fails, how many times do we retry to bind +OPTION(ms_bind_retry_delay, OPT_INT, 5) // Delay between attemps to bind OPTION(ms_rwthread_stack_bytes, OPT_U64, 1024 << 10) OPTION(ms_tcp_read_timeout, OPT_U64, 900) OPTION(ms_pq_max_tokens_per_priority, OPT_U64, 16777216) diff --git a/src/msg/simple/Accepter.cc b/src/msg/simple/Accepter.cc index 7fb2baa56fb..7d989a93691 100644 --- a/src/msg/simple/Accepter.cc +++ b/src/msg/simple/Accepter.cc @@ -69,43 +69,66 @@ int Accepter::bind(const entity_addr_t &bind_addr, const set& avoid_ports) /* bind to port */ int rc = -1; - if (listen_addr.get_port()) { - // specific port - - // reuse addr+port when possible - int on = 1; - rc = ::setsockopt(listen_sd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); - if (rc < 0) { - lderr(msgr->cct) << "accepter.bind unable to setsockopt: " - << cpp_strerror(errno) << dendl; - return -errno; - } + int r = -1; - rc = ::bind(listen_sd, (struct sockaddr *) &listen_addr.ss_addr(), listen_addr.addr_size()); - if (rc < 0) { - lderr(msgr->cct) << "accepter.bind unable to bind to " << listen_addr.ss_addr() - << ": " << cpp_strerror(errno) << dendl; - return -errno; - } - } else { - // try a range of ports - for (int port = msgr->cct->_conf->ms_bind_port_min; port <= msgr->cct->_conf->ms_bind_port_max; port++) { - if (avoid_ports.count(port)) - continue; - listen_addr.set_port(port); - rc = ::bind(listen_sd, (struct sockaddr *) &listen_addr.ss_addr(), listen_addr.addr_size()); - if (rc == 0) - break; + for (int i = 0; i < conf->ms_bind_retry_count; i++) { + + if (i > 0) { + lderr(msgr->cct) << "accepter.bind was unable to bind. Trying again in " << conf->ms_bind_retry_delay << " seconds " << dendl; + sleep(conf->ms_bind_retry_delay); } - if (rc < 0) { - lderr(msgr->cct) << "accepter.bind unable to bind to " << listen_addr.ss_addr() - << " on any port in range " << msgr->cct->_conf->ms_bind_port_min - << "-" << msgr->cct->_conf->ms_bind_port_max - << ": " << cpp_strerror(errno) - << dendl; - return -errno; + + if (listen_addr.get_port()) { + // specific port + + // reuse addr+port when possible + int on = 1; + rc = ::setsockopt(listen_sd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); + if (rc < 0) { + lderr(msgr->cct) << "accepter.bind unable to setsockopt: " + << cpp_strerror(errno) << dendl; + r = -errno; + continue; + } + + rc = ::bind(listen_sd, (struct sockaddr *) &listen_addr.ss_addr(), listen_addr.addr_size()); + if (rc < 0) { + lderr(msgr->cct) << "accepter.bind unable to bind to " << listen_addr.ss_addr() + << ": " << cpp_strerror(errno) << dendl; + r = -errno; + continue; + } + } else { + // try a range of ports + for (int port = msgr->cct->_conf->ms_bind_port_min; port <= msgr->cct->_conf->ms_bind_port_max; port++) { + if (avoid_ports.count(port)) + continue; + + listen_addr.set_port(port); + rc = ::bind(listen_sd, (struct sockaddr *) &listen_addr.ss_addr(), listen_addr.addr_size()); + if (rc == 0) + break; + } + if (rc < 0) { + lderr(msgr->cct) << "accepter.bind unable to bind to " << listen_addr.ss_addr() + << " on any port in range " << msgr->cct->_conf->ms_bind_port_min + << "-" << msgr->cct->_conf->ms_bind_port_max + << ": " << cpp_strerror(errno) + << dendl; + r = -errno; + continue; + } + ldout(msgr->cct,10) << "accepter.bind bound on random port " << listen_addr << dendl; } - ldout(msgr->cct,10) << "accepter.bind bound on random port " << listen_addr << dendl; + + if (rc == 0) + break; + } + + // It seems that binding completely failed, return with that exit status + if (rc < 0) { + lderr(msgr->cct) << "accepter.bind was unable to bind after " << conf->ms_bind_retry_count << " attempts: " << cpp_strerror(errno) << dendl; + return r; } // what port did we get? -- 2.47.3