From: Haomai Wang Date: Tue, 4 Apr 2017 09:28:13 +0000 (+0800) Subject: msg/async: Postpone bind if network stack is not ready X-Git-Tag: ses5-milestone6~9^2~21^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e94ba0d2ae4d7e1df41f3de406f27fc15ac5473a;p=ceph.git msg/async: Postpone bind if network stack is not ready RDMAStack shouldn't access hardware from the parent process. The only reason to do so, is because bind is called before the fork. After this patch the bind is postponed until the NetworkStack reports that it is ready to bind. For NetworkStack types will always return true, except the RDMAStack which will return true only after the fork (after AsyncMessenger::ready() is called). This patch is based on a patch by Haomai Wang Issue: 995322 Signed-off-by: Amir Vadai (cherry picked from commit a5b87e2fb83adec2b0fe060d3b22acfb1b9db1ed) Change-Id: I5f8be4f93a5fe0f2a7ef3b29d755be52b0790c06 --- diff --git a/src/msg/async/AsyncMessenger.cc b/src/msg/async/AsyncMessenger.cc index 814d058c8f73..122473179c3c 100644 --- a/src/msg/async/AsyncMessenger.cc +++ b/src/msg/async/AsyncMessenger.cc @@ -291,6 +291,15 @@ void AsyncMessenger::ready() { ldout(cct,10) << __func__ << " " << get_myaddr() << dendl; + stack->ready(); + if (pending_bind) { + int err = bind(pending_bind_addr); + if (err) { + lderr(cct) << __func__ << " postponed bind failed" << dendl; + ceph_abort(); + } + } + Mutex::Locker l(lock); for (auto &&p : processors) p->start(); @@ -320,12 +329,23 @@ int AsyncMessenger::shutdown() int AsyncMessenger::bind(const entity_addr_t &bind_addr) { lock.Lock(); - if (started) { + + if (!pending_bind && started) { ldout(cct,10) << __func__ << " already started" << dendl; lock.Unlock(); return -1; } + ldout(cct,10) << __func__ << " bind " << bind_addr << dendl; + + if (!stack->is_ready()) { + ldout(cct, 10) << __func__ << " Network Stack is not ready for bind yet - postponed" << dendl; + pending_bind_addr = bind_addr; + pending_bind = true; + lock.Unlock(); + return 0; + } + lock.Unlock(); // bind to a socket diff --git a/src/msg/async/AsyncMessenger.h b/src/msg/async/AsyncMessenger.h index c234a70a442a..01af51104959 100644 --- a/src/msg/async/AsyncMessenger.h +++ b/src/msg/async/AsyncMessenger.h @@ -236,6 +236,17 @@ private: // maybe this should be protected by the lock? bool need_addr; + /** + * set to bind address if bind was called before NetworkStack was ready to + * bind + */ + entity_addr_t pending_bind_addr; + + /** + * false; set to true if a pending bind exists + */ + bool pending_bind = false; + /** * The following aren't lock-protected since you shouldn't be able to race * the only writers. diff --git a/src/msg/async/Stack.h b/src/msg/async/Stack.h index 2ddfa8c6754f..3b7adca35da2 100644 --- a/src/msg/async/Stack.h +++ b/src/msg/async/Stack.h @@ -287,7 +287,6 @@ class NetworkStack : public CephContext::ForkWatcher { protected: CephContext *cct; vector workers; - // Used to indicate whether thread started explicit NetworkStack(CephContext *c, const string &t); public: @@ -337,6 +336,8 @@ class NetworkStack : public CephContext::ForkWatcher { start(); } + virtual bool is_ready() { return true; }; + virtual void ready() { }; }; #endif //CEPH_MSG_ASYNC_STACK_H diff --git a/src/msg/async/rdma/RDMAStack.h b/src/msg/async/rdma/RDMAStack.h index 7afc6c25dd4f..aa459731ce92 100644 --- a/src/msg/async/rdma/RDMAStack.h +++ b/src/msg/async/rdma/RDMAStack.h @@ -297,6 +297,8 @@ class RDMAStack : public NetworkStack { RDMADispatcher *dispatcher; PerfCounters *perf_counter; + std::atomic fork_finished = {false}; + public: explicit RDMAStack(CephContext *cct, const string &t); virtual ~RDMAStack(); @@ -306,6 +308,9 @@ class RDMAStack : public NetworkStack { virtual void spawn_worker(unsigned i, std::function &&func) override; virtual void join_worker(unsigned i) override; RDMADispatcher *get_dispatcher() { return dispatcher; } + + virtual bool is_ready() override { return fork_finished.load(); }; + virtual void ready() override { fork_finished = true; }; }; #endif