From c04fd4210f660c6458d5ecd9259429aff9fa4881 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 13 Apr 2016 15:22:30 -0400 Subject: [PATCH] osd: fix watch reconnect race It's possible to process a watch reconnect op, setting Watch::conn, before the ms_handle_reset path tries to disconnect the old one. Since reset() was blindly disconnecting the "current" connection, we could race and disconnect the new con instead of the old one. Fix this by specifying which con to disconnect. Fixes: http://tracker.ceph.com/issues/15441 Signed-off-by: Sage Weil --- src/osd/OSD.cc | 2 +- src/osd/Watch.cc | 8 ++++++-- src/osd/Watch.h | 5 ++++- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 7c260ff39aeae..d37bc7d70bd38 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -4662,7 +4662,7 @@ bool OSD::ms_handle_reset(Connection *con) dout(1) << "ms_handle_reset con " << con << " session " << session << dendl; if (!session) return false; - session->wstate.reset(); + session->wstate.reset(con); session->con.reset(NULL); // break con <-> session ref cycle session_handle_reset(session); session->put(); diff --git a/src/osd/Watch.cc b/src/osd/Watch.cc index f4411baf7feee..43846c03a16a8 100644 --- a/src/osd/Watch.cc +++ b/src/osd/Watch.cc @@ -513,7 +513,7 @@ void WatchConState::removeWatch(WatchRef watch) watches.erase(watch); } -void WatchConState::reset() +void WatchConState::reset(Connection *con) { set _watches; { @@ -526,7 +526,11 @@ void WatchConState::reset() boost::intrusive_ptr pg((*i)->get_pg()); pg->lock(); if (!(*i)->is_discarded()) { - (*i)->disconnect(); + if ((*i)->is_connected(con)) { + (*i)->disconnect(); + } else { + generic_derr << __func__ << " not still connected to " << (*i) << dendl; + } } pg->unlock(); } diff --git a/src/osd/Watch.h b/src/osd/Watch.h index 6e4ec37a6b51f..c6843ee785de3 100644 --- a/src/osd/Watch.h +++ b/src/osd/Watch.h @@ -202,6 +202,9 @@ public: bool is_connected() { return conn.get() != NULL; } + bool is_connected(Connection *con) { + return conn.get() == con; + } /// NOTE: must be called with pg lock held ~Watch(); @@ -290,7 +293,7 @@ public: ); /// Called on session reset, disconnects watchers - void reset(); + void reset(Connection *con); }; #endif -- 2.39.5