From: Sage Weil Date: Tue, 15 Sep 2015 20:34:34 +0000 (-0400) Subject: osd: fix pg_stats_queue lock protection X-Git-Tag: v10.0.1~26^2~57 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=093478afa96bf0c99e2ce4ee98e4ab04bdccc54c;p=ceph.git osd: fix pg_stats_queue lock protection We are indirectly relying on osd_lock, but that may no longer work for us in the future. Use the stats lock instead. Signed-off-by: Sage Weil --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index e1b4a90e4e7e..f194628f7ea4 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3986,23 +3986,25 @@ void OSD::tick() heartbeat_lock.Unlock(); // mon report? + bool reset = false; + bool report = false; utime_t now = ceph_clock_now(cct); + pg_stat_queue_lock.Lock(); if (outstanding_pg_stats && (now - stats_ack_timeout) > last_pg_stats_ack) { dout(1) << __func__ << " mon hasn't acked PGStats in " << now - last_pg_stats_ack << " seconds, reconnecting elsewhere" << dendl; - monc->reopen_session(); + reset = true; last_pg_stats_ack = ceph_clock_now(cct); // reset clock last_pg_stats_sent = utime_t(); stats_ack_timeout = MAX(g_conf->osd_mon_ack_timeout, stats_ack_timeout * g_conf->osd_stats_ack_timeout_factor); - outstanding_pg_stats = 0; } if (now - last_pg_stats_sent > cct->_conf->osd_mon_report_interval_max) { osd_stat_updated = true; - do_mon_report(); + report = true; } else { double backoff = stats_ack_timeout / g_conf->osd_mon_ack_timeout; double adjusted_min = cct->_conf->osd_mon_report_interval_min * backoff; @@ -4010,9 +4012,15 @@ void OSD::tick() dout(20) << __func__ << " stats backoff " << backoff << " adjusted_min " << adjusted_min << " - sending report" << dendl; - do_mon_report(); + report = true; } } + pg_stat_queue_lock.Unlock(); + + if (reset) + monc->reopen_session(); + else if (report) + do_mon_report(); map_lock.put_read(); } @@ -4367,6 +4375,12 @@ void OSD::ms_handle_connect(Connection *con) if (is_stopping()) return; dout(10) << "ms_handle_connect on mon" << dendl; + + // reset pg stats state + pg_stat_queue_lock.Lock(); + outstanding_pg_stats = 0; + pg_stat_queue_lock.Unlock(); + if (is_booting()) { start_boot(); } else { @@ -4812,6 +4826,8 @@ void OSD::handle_pg_stats_ack(MPGStatsAck *ack) return; } + pg_stat_queue_lock.Lock(); + last_pg_stats_ack = ceph_clock_now(cct); // decay timeout slowly (analogous to TCP) @@ -4820,8 +4836,6 @@ void OSD::handle_pg_stats_ack(MPGStatsAck *ack) stats_ack_timeout * g_conf->osd_stats_ack_timeout_decay); dout(20) << __func__ << " timeout now " << stats_ack_timeout << dendl; - pg_stat_queue_lock.Lock(); - if (ack->get_tid() > pg_stat_tid_flushed) { pg_stat_tid_flushed = ack->get_tid(); pg_stat_queue_cond.Signal();