]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: unlock sdata_op_ordering_lock with sdata_lock hold to avoid missing wakeup signal 15891/head
authorMing Lin <ming.lin@alibaba-inc.com>
Fri, 23 Jun 2017 17:28:19 +0000 (10:28 -0700)
committerMing Lin <ming.lin@alibaba-inc.com>
Fri, 23 Jun 2017 17:30:35 +0000 (10:30 -0700)
We are running mysql on top of rbd. sysbench qps occasionally drops to zero
with the INSERT benchmark.

Debug code captured >2s latency between PG::queue_op() and OSD::dequeue_op().
We finally found out that the latency came from below code in OSD::ShardedOpWQ::_process(),

sdata->sdata_cond.WaitInterval(sdata->sdata_lock,
      utime_t(osd->cct->_conf->threadpool_empty_queue_max_wait, 0));

"threadpool_empty_queue_max_wait" is 2s by default.

Normally, it should not sleep for 2s since the comming IO requests will wakeup it.
But there is a small timing window that it missed the wakeup signal actually.
For example,

     msgr-worker-0 thread                    tp_osd_tp thread
     OSD::ShardedOpWQ::_enqueue              OSD::ShardedOpWQ::_process
     ---------------------------             ---------------------------
T1:                                          sdata_op_ordering_lock.Lock()
T2:  sdata_op_ordering_lock.Lock()
                                             "queue empty"
                                             sdata_op_ordering_lock.Unlock()
     "insert op"
     sdata_op_ordering_lock.Unlock()

T3:  sdata_lock.Lock()
T4:                                          sdata_lock.Lock()
     "send wakeup signal"
     sdata_lock.Unock()
                                             // here the wakeup signal has no effect actually
                                             // becuase it has not slept yet.

                                             // then, it sleeps.
                                             WaitInterval(2s)

This patch unlocks sdata_op_ordering_lock with sdata_lock hold in OSD::ShardedOpWQ::_process(),
then the timeline becomes,

     msgr-worker-0 thread                    tp_osd_tp thread
     OSD::ShardedOpWQ::_enqueue              OSD::ShardedOpWQ::_process
     ---------------------------             ---------------------------
T1:                                          sdata_op_ordering_lock.Lock()
T2:  sdata_op_ordering_lock.Lock()
                                             "queue empty"
                                             sdata_lock.Lock()
T3:                                          sdata_op_ordering_lock.Unlock()
     "insert op"
     sdata_op_ordering_lock.Unlock()
     sdata_lock.Lock()

T4:                                          WaitInterval(2s) -> it actually unlocks sdata_lock
     "send wakeup signal"
     sdata_lock.Unock()
                                             //got signal, wakeup immediately

With this one line change, we can avoid occasional high latency.

Signed-off-by: Ming Lin <ming.lin@alibaba-inc.com>
src/osd/OSD.cc

index fb77b0777bca21d4cdcfbb6a4b681cf6a96b13d7..8e860a37e31cf488656e26efcfc4ed598cab19c6 100644 (file)
@@ -9853,10 +9853,10 @@ void OSD::ShardedOpWQ::_process(uint32_t thread_index, heartbeat_handle_d *hb)
   if (sdata->pqueue->empty()) {
     dout(20) << __func__ << " empty q, waiting" << dendl;
     // optimistically sleep a moment; maybe another work item will come along.
-    sdata->sdata_op_ordering_lock.Unlock();
     osd->cct->get_heartbeat_map()->reset_timeout(hb,
       osd->cct->_conf->threadpool_default_timeout, 0);
     sdata->sdata_lock.Lock();
+    sdata->sdata_op_ordering_lock.Unlock();
     sdata->sdata_cond.WaitInterval(sdata->sdata_lock,
       utime_t(osd->cct->_conf->threadpool_empty_queue_max_wait, 0));
     sdata->sdata_lock.Unlock();