From: chunmei Liu Date: Sat, 16 Dec 2017 18:45:32 +0000 (-0800) Subject: osd: fix dpdk runtime issue based on spdk/dpdk libarary X-Git-Tag: v13.0.2~654^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=c9051fbad38f3ce31cad41e3cadb9447e395630a;p=ceph.git osd: fix dpdk runtime issue based on spdk/dpdk libarary 1.misunderstand mbuf_overhead before, so correct it. 2.spdk/dpdk library will set refcnt, so needn't set it here. 3.avoid dpdk library modification. when create memory pool set elt_size as mbuf_overhead + mbuf_data_size to avoid dpdk library check size assert. 4. call rte_pktmbuf_prefree_seg to set mbuf->next = null avoid dpdk/lib modification . 5. use memzon to allocate mbuf data part, so these data buf can be processes the same way as mbuf allocated by mempool create. Signed-off-by: chunmei Liu --- diff --git a/src/msg/async/dpdk/DPDK.cc b/src/msg/async/dpdk/DPDK.cc index 42da69af8444..e7a4ceddafed 100644 --- a/src/msg/async/dpdk/DPDK.cc +++ b/src/msg/async/dpdk/DPDK.cc @@ -98,7 +98,7 @@ static constexpr uint16_t mbuf_cache_size = 512; static constexpr size_t mbuf_data_size = 4096; static constexpr uint16_t mbuf_overhead = -sizeof(struct rte_mbuf) + mbuf_data_size + RTE_PKTMBUF_HEADROOM; + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM; // // We'll allocate 2K data buffers for an inline case because this would require // a single page per mbuf. If we used 4K data buffers here it would require 2 @@ -496,6 +496,9 @@ bool DPDKQueuePair::init_rx_mbuf_pool() { std::string name = std::string(pktmbuf_pool_name) + std::to_string(_qid) + "_rx"; + int bufs_count = cct->_conf->ms_dpdk_rx_buffer_count_per_core - mbufs_per_queue_rx; + int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY; + char mz_name[RTE_MEMZONE_NAMESIZE]; // reserve the memory for Rx buffers containers _rx_free_pkts.reserve(mbufs_per_queue_rx); _rx_free_bufs.reserve(mbufs_per_queue_rx); @@ -513,7 +516,7 @@ bool DPDKQueuePair::init_rx_mbuf_pool() roomsz.mbuf_data_room_size = mbuf_data_size + RTE_PKTMBUF_HEADROOM; _pktmbuf_pool_rx = rte_mempool_create( name.c_str(), - mbufs_per_queue_rx, mbuf_overhead, + mbufs_per_queue_rx, mbuf_overhead + mbuf_data_size, mbuf_cache_size, sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, as_cookie(roomsz), @@ -529,31 +532,21 @@ bool DPDKQueuePair::init_rx_mbuf_pool() // 2) Bind data buffers to each of them. // 3) Return them back to the pool. // - for (int i = 0; i < mbufs_per_queue_rx; i++) { - rte_mbuf* m = rte_pktmbuf_alloc(_pktmbuf_pool_rx); - assert(m); - rte_mbuf_refcnt_set(m, 1); - _rx_free_bufs.push_back(m); + int ret = snprintf(mz_name, sizeof(mz_name), + "%s", "rx_buffer_data" + std::to_string(_qid)); + if (ret < 0 || ret >= (int)sizeof(mz_name)) { + return false; } - - for (int i = 0; i < cct->_conf->ms_dpdk_rx_buffer_count_per_core; i++) { - void* m = rte_malloc(NULL, mbuf_data_size, mbuf_data_size); + const struct rte_memzone *mz = rte_memzone_reserve_aligned(mz_name, mbuf_data_size*bufs_count, + _pktmbuf_pool_rx->socket_id, mz_flags, mbuf_data_size); + assert(mz); + void* m = mz->addr; + for (int i = 0; i < bufs_count; i++) { assert(m); _alloc_bufs.push_back(m); + m += mbuf_data_size; } - for (auto&& m : _rx_free_bufs) { - if (!init_noninline_rx_mbuf(m, mbuf_data_size, _alloc_bufs)) { - lderr(cct) << __func__ << " Failed to allocate data buffers for Rx ring. " - "Consider increasing the amount of memory." << dendl; - return false; - } - } - - rte_mempool_put_bulk(_pktmbuf_pool_rx, (void**)_rx_free_bufs.data(), - _rx_free_bufs.size()); - - _rx_free_bufs.clear(); if (rte_eth_rx_queue_setup(_dev_port_idx, _qid, default_ring_size, rte_eth_dev_socket_id(_dev_port_idx), _dev->def_rx_conf(), _pktmbuf_pool_rx) < 0) { @@ -800,6 +793,9 @@ bool DPDKQueuePair::rx_gc(bool force) break; } } + for (auto&& m : _rx_free_bufs) { + rte_pktmbuf_prefree_seg(m); + } if (_rx_free_bufs.size()) { rte_mempool_put_bulk(_pktmbuf_pool_rx, diff --git a/src/msg/async/dpdk/DPDK.h b/src/msg/async/dpdk/DPDK.h index 3c5c924eaa30..bf87c7be7e3f 100644 --- a/src/msg/async/dpdk/DPDK.h +++ b/src/msg/async/dpdk/DPDK.h @@ -626,18 +626,7 @@ class DPDKQueuePair { // actual data buffer. // m->buf_addr = (char*)data - RTE_PKTMBUF_HEADROOM; - m->buf_physaddr = rte_malloc_virt2iova(data) - RTE_PKTMBUF_HEADROOM; - return true; - } - - static bool init_noninline_rx_mbuf(rte_mbuf* m, size_t size, - std::vector &datas) { - if (!refill_rx_mbuf(m, size, datas)) { - return false; - } - // The below fields stay constant during the execution. - m->buf_len = size + RTE_PKTMBUF_HEADROOM; - m->data_off = RTE_PKTMBUF_HEADROOM; + m->buf_physaddr = rte_mem_virt2phy(data) - RTE_PKTMBUF_HEADROOM; return true; }