From: Sage Weil Date: Tue, 14 Oct 2014 19:41:48 +0000 (-0700) Subject: msg/simple: do not stop_and_wait on mark_down X-Git-Tag: v0.87.1~31^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=300d4c6ff7e998dba0c67f6dde746dc23d681397;p=ceph.git msg/simple: do not stop_and_wait on mark_down We originally blocked in mark_down for fast dispatch threads to complete to avoid various races in the code. Most of these were in the OSD itself, where we were not prepared to get messges on connections that had no attached session. Since then, the OSD checks have been cleaned up to handle this. There were other races we were worried about too, but the details have been lost in the depths of time. Instead, take the other route: make mark_down never block on dispatch. This lets us remove the special case that was added in order to cope with fast dispatch calling mark_down on itself. Now, the only stop_and_wait() user is the shutdown sequence. Signed-off-by: Sage Weil (cherry picked from commit 00907e032011b9d2acd16ea588555cf379830814) --- diff --git a/src/msg/Pipe.cc b/src/msg/Pipe.cc index 32e20db3f156b..fc16f459b4872 100644 --- a/src/msg/Pipe.cc +++ b/src/msg/Pipe.cc @@ -245,8 +245,7 @@ void *Pipe::DelayedDelivery::entry() void Pipe::DelayedDelivery::stop_fast_dispatching() { Mutex::Locker l(delay_lock); stop_fast_dispatching_flag = true; - // we can't block if we're the delay thread; see Pipe::stop_and_wait() - while (delay_dispatching && !am_self()) + while (delay_dispatching) delay_cond.Wait(delay_lock); } @@ -1447,18 +1446,11 @@ void Pipe::stop_and_wait() t.sleep(); } - // HACK: we work around an annoying deadlock here. If the fast - // dispatch method calls mark_down() on itself, it can block here - // waiting for the reader_dispatching flag to clear... which will - // clearly never happen. Avoid the situation by skipping the wait - // if we are marking our *own* connect down. Do the same for the - // delayed dispatch thread. if (delay_thread) { delay_thread->stop_fast_dispatching(); } while (reader_running && - reader_dispatching && - !reader_thread.am_self()) + reader_dispatching) cond.Wait(pipe_lock); } diff --git a/src/msg/SimpleMessenger.cc b/src/msg/SimpleMessenger.cc index cf6bc73711bad..edc8f0fe598a7 100644 --- a/src/msg/SimpleMessenger.cc +++ b/src/msg/SimpleMessenger.cc @@ -581,7 +581,7 @@ void SimpleMessenger::mark_down_all() Pipe *p = *q; ldout(cct,5) << "mark_down_all accepting_pipe " << p << dendl; p->pipe_lock.Lock(); - p->stop_and_wait(); + p->stop(); PipeConnectionRef con = p->connection_state; if (con && con->clear_pipe(p)) dispatch_queue.queue_reset(con.get()); @@ -596,7 +596,7 @@ void SimpleMessenger::mark_down_all() rank_pipe.erase(it); p->unregister_pipe(); p->pipe_lock.Lock(); - p->stop_and_wait(); + p->stop(); PipeConnectionRef con = p->connection_state; if (con && con->clear_pipe(p)) dispatch_queue.queue_reset(con.get()); @@ -613,7 +613,7 @@ void SimpleMessenger::mark_down(const entity_addr_t& addr) ldout(cct,1) << "mark_down " << addr << " -- " << p << dendl; p->unregister_pipe(); p->pipe_lock.Lock(); - p->stop_and_wait(); + p->stop(); if (p->connection_state) { // generate a reset event for the caller in this case, even // though they asked for it, since this is the addr-based (and @@ -640,7 +640,7 @@ void SimpleMessenger::mark_down(Connection *con) assert(p->msgr == this); p->unregister_pipe(); p->pipe_lock.Lock(); - p->stop_and_wait(); + p->stop(); if (p->connection_state) { // do not generate a reset event for the caller in this case, // since they asked for it.