It's possible to process a watch reconnect op, setting Watch::conn, before
the ms_handle_reset path tries to disconnect the old one. Since reset()
was blindly disconnecting the "current" connection, we could race and
disconnect the new con instead of the old one.
Fix this by specifying which con to disconnect.
Fixes: http://tracker.ceph.com/issues/15441
Signed-off-by: Sage Weil <sage@redhat.com>
dout(1) << "ms_handle_reset con " << con << " session " << session << dendl;
if (!session)
return false;
- session->wstate.reset();
+ session->wstate.reset(con);
session->con.reset(NULL); // break con <-> session ref cycle
session_handle_reset(session);
session->put();
watches.erase(watch);
}
-void WatchConState::reset()
+void WatchConState::reset(Connection *con)
{
set<WatchRef> _watches;
{
boost::intrusive_ptr<ReplicatedPG> pg((*i)->get_pg());
pg->lock();
if (!(*i)->is_discarded()) {
- (*i)->disconnect();
+ if ((*i)->is_connected(con)) {
+ (*i)->disconnect();
+ } else {
+ generic_derr << __func__ << " not still connected to " << (*i) << dendl;
+ }
}
pg->unlock();
}
bool is_connected() {
return conn.get() != NULL;
}
+ bool is_connected(Connection *con) {
+ return conn.get() == con;
+ }
/// NOTE: must be called with pg lock held
~Watch();
);
/// Called on session reset, disconnects watchers
- void reset();
+ void reset(Connection *con);
};
#endif