]> git.apps.os.sepia.ceph.com Git - ceph.git/commit
osd: fix race between op_wq and context_queue 24761/head
authorSage Weil <sage@redhat.com>
Thu, 25 Oct 2018 19:24:02 +0000 (14:24 -0500)
committerSage Weil <sage@redhat.com>
Fri, 26 Oct 2018 15:27:09 +0000 (10:27 -0500)
commitd76789444cc09df822a9b83097456e63cb0e030c
tree7208cd108dd71a113578971422cf59fb5397b961
parent375091f9f9f0dbf36e31ff8afe56af170353749d
osd: fix race between op_wq and context_queue

        ThreadA                                                 ThreadB
  sdata->shard_lock.Lock();
  if (sdata->pqueue->empty() &&
     !(is_smallest_thread_index && !sdata->context_queue.empty())) {

    void queue(list<Context *>& ls) {
        bool empty = false;
                                                                       {
                                                                         std::scoped_lock l(q_mutex);
                                                                         if (q.empty()) {
                                                                           q.swap(ls);
                                                                           empty = true;
                                                                         } else {
                                                                           q.insert(q.end(), ls.begin(), ls.end());
                                                                         }
                                                                       }

                                                                       if (empty) {
                                                                         mutex.Lock();
                                                                         cond.Signal();
                                                                         mutex.Unlock();
                                                                       }
                                                                    }

     sdata->sdata_wait_lock.Lock();
    if (!sdata->stop_waiting) {

Fix by simply rechecking that context_queue is empty after taking the
wait lock.  We still check it without taking that lock to keep the hot/busy
path fast (we avoid the wait lock in general) at the expense of taking
the context_queue qlock twice in the idle/wait path (where we don't care
so much about additional latency/cycles).

Fixes: http://tracker.ceph.com/issues/36473
Signed-off-by: Jianpeng Ma <jianpeng.ma@intel.com>
Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/OSD.cc