From: Kefu Chai Date: Tue, 28 Mar 2017 03:29:20 +0000 (+0800) Subject: mon: acquire lock when accessing mon->session_map X-Git-Tag: v12.0.2~256^2~9 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=df6dafe1bb8404da70e7ddb333e0007578af344a;p=ceph.git mon: acquire lock when accessing mon->session_map we will access the mon->session_map for sending the osd-pg-creates messages when finishing osdmapping in OSDMonitor, this could happen in another thread without the protection of Monitor::lock, or in the same thread already guarded by Monitor::lock. so instead of changing Monitor::lock to a recursive lock, a new lock is introduced to protect session_map. Signed-off-by: Kefu Chai --- diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index d41cd4d09b63..657d22cff4cc 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -3552,6 +3552,7 @@ void Monitor::remove_session(MonSession *s) void Monitor::remove_all_sessions() { + Mutex::Locker l(session_map_lock); while (!session_map.sessions.empty()) { MonSession *s = session_map.sessions.front(); remove_session(s); @@ -3601,8 +3602,10 @@ void Monitor::waitlist_or_zap_client(MonOpRequestRef op) con->mark_down(); // proxied sessions aren't registered and don't have a con; don't remove // those. - if (!s->proxy_con) + if (!s->proxy_con) { + Mutex::Locker l(session_map_lock); remove_session(s); + } op->mark_zap(); } } @@ -3636,7 +3639,10 @@ void Monitor::_ms_dispatch(Message *m) } ConnectionRef con = m->get_connection(); - s = session_map.new_session(m->get_source_inst(), con.get()); + { + Mutex::Locker l(session_map_lock); + s = session_map.new_session(m->get_source_inst(), con.get()); + } assert(s); con->set_priv(s->get()); dout(10) << __func__ << " new session " << s << " " << *s << dendl; @@ -4407,6 +4413,7 @@ void Monitor::handle_subscribe(MonOpRequestRef op) for (map::iterator it = s->sub_map.begin(); it != s->sub_map.end(); ) { if (it->first != p->first && logmon()->sub_name_to_id(it->first) >= 0) { + Mutex::Locker l(session_map_lock); session_map.remove_sub((it++)->second); } else { ++it; @@ -4414,9 +4421,12 @@ void Monitor::handle_subscribe(MonOpRequestRef op) } } - session_map.add_update_sub(s, p->first, p->second.start, - p->second.flags & CEPH_SUBSCRIBE_ONETIME, - m->get_connection()->has_feature(CEPH_FEATURE_INCSUBOSDMAP)); + { + Mutex::Locker l(session_map_lock); + session_map.add_update_sub(s, p->first, p->second.start, + p->second.flags & CEPH_SUBSCRIBE_ONETIME, + m->get_connection()->has_feature(CEPH_FEATURE_INCSUBOSDMAP)); + } if (p->first.compare(0, 6, "mdsmap") == 0 || p->first.compare(0, 5, "fsmap") == 0) { dout(10) << __func__ << ": MDS sub '" << p->first << "'" << dendl; @@ -4523,8 +4533,10 @@ bool Monitor::ms_handle_reset(Connection *con) Mutex::Locker l(lock); dout(10) << "reset/close on session " << s->inst << dendl; - if (!s->closed) + if (!s->closed) { + Mutex::Locker l(session_map_lock); remove_session(s); + } s->put(); return true; } @@ -4971,41 +4983,43 @@ void Monitor::tick() // trim sessions utime_t now = ceph_clock_now(); - xlist::iterator p = session_map.sessions.begin(); + { + Mutex::Locker l(session_map_lock); + auto p = session_map.sessions.begin(); - bool out_for_too_long = (!exited_quorum.is_zero() - && now > (exited_quorum + 2*g_conf->mon_lease)); + bool out_for_too_long = (!exited_quorum.is_zero() && + now > (exited_quorum + 2*g_conf->mon_lease)); - while (!p.end()) { - MonSession *s = *p; - ++p; + while (!p.end()) { + MonSession *s = *p; + ++p; - // don't trim monitors - if (s->inst.name.is_mon()) - continue; + // don't trim monitors + if (s->inst.name.is_mon()) + continue; - if (s->session_timeout < now && s->con) { - // check keepalive, too - s->session_timeout = s->con->get_last_keepalive(); - s->session_timeout += g_conf->mon_session_timeout; - } - if (s->session_timeout < now) { - dout(10) << " trimming session " << s->con << " " << s->inst - << " (timeout " << s->session_timeout - << " < now " << now << ")" << dendl; - } else if (out_for_too_long) { - // boot the client Session because we've taken too long getting back in - dout(10) << " trimming session " << s->con << " " << s->inst - << " because we've been out of quorum too long" << dendl; - } else { - continue; - } + if (s->session_timeout < now && s->con) { + // check keepalive, too + s->session_timeout = s->con->get_last_keepalive(); + s->session_timeout += g_conf->mon_session_timeout; + } + if (s->session_timeout < now) { + dout(10) << " trimming session " << s->con << " " << s->inst + << " (timeout " << s->session_timeout + << " < now " << now << ")" << dendl; + } else if (out_for_too_long) { + // boot the client Session because we've taken too long getting back in + dout(10) << " trimming session " << s->con << " " << s->inst + << " because we've been out of quorum too long" << dendl; + } else { + continue; + } - s->con->mark_down(); - remove_session(s); - logger->inc(l_mon_session_trim); + s->con->mark_down(); + remove_session(s); + logger->inc(l_mon_session_trim); + } } - sync_trim_providers(); if (!maybe_wait_for_quorum.empty()) { diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h index 96d8f2582f39..fa230c55749a 100644 --- a/src/mon/Monitor.h +++ b/src/mon/Monitor.h @@ -654,8 +654,14 @@ public: // -- sessions -- MonSessionMap session_map; + Mutex session_map_lock{"Monitor::session_map_lock"}; AdminSocketHook *admin_hook; + template + void with_session_map(Func&& func) { + Mutex::Locker l(session_map_lock); + std::forward(func)(session_map); + } void send_latest_monmap(Connection *con); // messages diff --git a/src/mon/MonmapMonitor.cc b/src/mon/MonmapMonitor.cc index 046cb58eeffd..6633009b3ef8 100644 --- a/src/mon/MonmapMonitor.cc +++ b/src/mon/MonmapMonitor.cc @@ -767,12 +767,14 @@ int MonmapMonitor::get_monmap(bufferlist &bl) void MonmapMonitor::check_subs() { const string type = "monmap"; - auto subs = mon->session_map.subs.find(type); - if (subs == mon->session_map.subs.end()) - return; - for (auto sub : *subs->second) { - check_sub(sub); - } + mon->with_session_map([this, &type](const MonSessionMap& session_map) { + auto subs = session_map.subs.find(type); + if (subs == session_map.subs.end()) + return; + for (auto sub : *subs->second) { + check_sub(sub); + } + }); } void MonmapMonitor::check_sub(Subscription *sub) @@ -783,9 +785,12 @@ void MonmapMonitor::check_sub(Subscription *sub) << " have " << epoch << dendl; if (sub->next <= epoch) { mon->send_latest_monmap(sub->session->con.get()); - if (sub->onetime) - mon->session_map.remove_sub(sub); - else + if (sub->onetime) { + mon->with_session_map([this, sub](MonSessionMap& session_map) { + session_map.remove_sub(sub); + }); + } else { sub->next = epoch + 1; + } } } diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index 547d510aa805..001dc34af8d4 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -1915,17 +1915,20 @@ int PGMonitor::dump_stuck_pg_stats(stringstream &ds, void PGMonitor::check_subs() { dout(10) << __func__ << dendl; - string type = "osd_pg_creates"; - if (mon->session_map.subs.count(type) == 0) - return; - - xlist::iterator p = mon->session_map.subs[type]->begin(); - while (!p.end()) { - Subscription *sub = *p; - ++p; - dout(20) << __func__ << " .. " << sub->session->inst << dendl; - check_sub(sub); - } + const string type = "osd_pg_creates"; + + mon->with_session_map([this, &type](const MonSessionMap& session_map) { + if (mon->session_map.subs.count(type) == 0) + return; + + auto p = mon->session_map.subs[type]->begin(); + while (!p.end()) { + Subscription *sub = *p; + ++p; + dout(20) << __func__ << " .. " << sub->session->inst << dendl; + check_sub(sub); + } + }); } void PGMonitor::check_sub(Subscription *sub)