]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: fix pg_stats_queue lock protection
authorSage Weil <sage@redhat.com>
Tue, 15 Sep 2015 20:34:34 +0000 (16:34 -0400)
committerSage Weil <sage@redhat.com>
Sat, 14 Nov 2015 03:42:42 +0000 (22:42 -0500)
We are indirectly relying on osd_lock, but that may no longer
work for us in the future.  Use the stats lock instead.

Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/OSD.cc

index e1b4a90e4e7e759522304ad3b9c16bd38a97a98a..f194628f7ea40ccf435322445be7a404193765f1 100644 (file)
@@ -3986,23 +3986,25 @@ void OSD::tick()
     heartbeat_lock.Unlock();
 
     // mon report?
+    bool reset = false;
+    bool report = false;
     utime_t now = ceph_clock_now(cct);
+    pg_stat_queue_lock.Lock();
     if (outstanding_pg_stats &&
        (now - stats_ack_timeout) > last_pg_stats_ack) {
       dout(1) << __func__ << " mon hasn't acked PGStats in "
              << now - last_pg_stats_ack
              << " seconds, reconnecting elsewhere" << dendl;
-      monc->reopen_session();
+      reset = true;
       last_pg_stats_ack = ceph_clock_now(cct);  // reset clock
       last_pg_stats_sent = utime_t();
       stats_ack_timeout =
        MAX(g_conf->osd_mon_ack_timeout,
            stats_ack_timeout * g_conf->osd_stats_ack_timeout_factor);
-      outstanding_pg_stats = 0;
     }
     if (now - last_pg_stats_sent > cct->_conf->osd_mon_report_interval_max) {
       osd_stat_updated = true;
-      do_mon_report();
+      report = true;
     } else {
       double backoff = stats_ack_timeout / g_conf->osd_mon_ack_timeout;
       double adjusted_min = cct->_conf->osd_mon_report_interval_min * backoff;
@@ -4010,9 +4012,15 @@ void OSD::tick()
        dout(20) << __func__ << " stats backoff " << backoff
                 << " adjusted_min " << adjusted_min << " - sending report"
                 << dendl;
-       do_mon_report();
+       report = true;
       }
     }
+    pg_stat_queue_lock.Unlock();
+
+    if (reset)
+      monc->reopen_session();
+    else if (report)
+      do_mon_report();
 
     map_lock.put_read();
   }
@@ -4367,6 +4375,12 @@ void OSD::ms_handle_connect(Connection *con)
     if (is_stopping())
       return;
     dout(10) << "ms_handle_connect on mon" << dendl;
+
+    // reset pg stats state
+    pg_stat_queue_lock.Lock();
+    outstanding_pg_stats = 0;
+    pg_stat_queue_lock.Unlock();
+
     if (is_booting()) {
       start_boot();
     } else {
@@ -4812,6 +4826,8 @@ void OSD::handle_pg_stats_ack(MPGStatsAck *ack)
     return;
   }
 
+  pg_stat_queue_lock.Lock();
+
   last_pg_stats_ack = ceph_clock_now(cct);
 
   // decay timeout slowly (analogous to TCP)
@@ -4820,8 +4836,6 @@ void OSD::handle_pg_stats_ack(MPGStatsAck *ack)
        stats_ack_timeout * g_conf->osd_stats_ack_timeout_decay);
   dout(20) << __func__ << "  timeout now " << stats_ack_timeout << dendl;
 
-  pg_stat_queue_lock.Lock();
-
   if (ack->get_tid() > pg_stat_tid_flushed) {
     pg_stat_tid_flushed = ack->get_tid();
     pg_stat_queue_cond.Signal();