From: Sage Weil Date: Fri, 24 Apr 2015 20:41:35 +0000 (-0700) Subject: os/newstore: fix _txc_aio_submit X-Git-Tag: v9.1.0~242^2~43 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=4eca15a950794bbff24e293d708b36e994c17280;p=ceph.git os/newstore: fix _txc_aio_submit The aios may complete before _txc_aio_submit completes. In fact, the aio may complete, commit to the kv store, and then queue more wal aio's before we finish the loop. Move aios to a separate list to ensure we only submit them once and do not right another CPU adjusting the list. Signed-off-by: Sage Weil --- diff --git a/src/os/newstore/NewStore.cc b/src/os/newstore/NewStore.cc index a9ece6da05a5..17109ba9645f 100644 --- a/src/os/newstore/NewStore.cc +++ b/src/os/newstore/NewStore.cc @@ -2056,7 +2056,7 @@ void NewStore::_txc_state_proc(TransContext *txc) << " " << txc->get_state_name() << dendl; switch (txc->state) { case TransContext::STATE_PREPARE: - if (!txc->aios.empty()) { + if (!txc->pending_aios.empty()) { txc->state = TransContext::STATE_AIO_WAIT; _txc_aio_submit(txc); return; @@ -2107,7 +2107,7 @@ void NewStore::_txc_state_proc(TransContext *txc) break; case TransContext::STATE_WAL_APPLYING: - if (!txc->aios.empty()) { + if (!txc->pending_aios.empty()) { txc->state = TransContext::STATE_WAL_AIO_WAIT; _txc_aio_submit(txc); return; @@ -2407,7 +2407,7 @@ int NewStore::_wal_apply(TransContext *txc) dout(20) << __func__ << " txc " << txc << " seq " << wt.seq << dendl; txc->state = TransContext::STATE_WAL_APPLYING; - txc->aios.clear(); + assert(txc->pending_aios.empty()); int r = _do_wal_transaction(wt, txc); assert(r == 0); @@ -2464,8 +2464,8 @@ int NewStore::_do_wal_transaction(wal_transaction_t& wt, return fd; #ifdef HAVE_LIBAIO if (g_conf->newstore_aio && txc && (flags & O_DIRECT)) { - txc->aios.push_back(FS::aio_t(txc, fd)); - FS::aio_t& aio = txc->aios.back(); + txc->pending_aios.push_back(FS::aio_t(txc, fd)); + FS::aio_t& aio = txc->pending_aios.back(); p->data.prepare_iov(&aio.iov); aio.pwritev(p->offset); dout(2) << __func__ << " prepared aio " << &aio << dendl; @@ -2642,13 +2642,19 @@ int NewStore::queue_transactions( void NewStore::_txc_aio_submit(TransContext *txc) { - int num = txc->aios.size(); + int num = txc->pending_aios.size(); dout(10) << __func__ << " txc " << txc << " submitting " << num << dendl; assert(num > 0); txc->num_aio.set(num); - for (list::iterator p = txc->aios.begin(); - p != txc->aios.end(); - ++p) { + + // move these aside, and get our end iterator position now, as the + // aios might complete as soon as they are submitted and queue more + // wal aio's. + list::iterator e = txc->submitted_aios.begin(); + txc->submitted_aios.splice(e, txc->pending_aios); + list::iterator p = txc->submitted_aios.begin(); + assert(p != e); + for (; p != e; ++p) { FS::aio_t& aio = *p; dout(20) << __func__ << " aio " << &aio << " fd " << aio.fd << dendl; for (vector::iterator q = aio.iov.begin(); q != aio.iov.end(); ++q) @@ -3257,8 +3263,8 @@ int NewStore::_do_write(TransContext *txc, } #ifdef HAVE_LIBAIO if (g_conf->newstore_aio && (flags & O_DIRECT)) { - txc->aios.push_back(FS::aio_t(txc, fd)); - FS::aio_t& aio = txc->aios.back(); + txc->pending_aios.push_back(FS::aio_t(txc, fd)); + FS::aio_t& aio = txc->pending_aios.back(); bl.prepare_iov(&aio.iov); txc->aio_bl.append(bl); aio.pwritev(x_offset); @@ -3305,8 +3311,8 @@ int NewStore::_do_write(TransContext *txc, #ifdef HAVE_LIBAIO if (g_conf->newstore_aio && (flags & O_DIRECT)) { - txc->aios.push_back(FS::aio_t(txc, fd)); - FS::aio_t& aio = txc->aios.back(); + txc->pending_aios.push_back(FS::aio_t(txc, fd)); + FS::aio_t& aio = txc->pending_aios.back(); bl.prepare_iov(&aio.iov); txc->aio_bl.append(bl); aio.pwritev(0); diff --git a/src/os/newstore/NewStore.h b/src/os/newstore/NewStore.h index 9f97122045fa..55b73f7fd0b9 100644 --- a/src/os/newstore/NewStore.h +++ b/src/os/newstore/NewStore.h @@ -200,7 +200,8 @@ public: wal_transaction_t *wal_txn; ///< wal transaction (if any) unsigned num_fsyncs_completed; - list aios; + list pending_aios; ///< not yet submitted + list submitted_aios; ///< submitting or submitted bufferlist aio_bl; // just a pile of refs atomic_t num_aio;