static constexpr uint16_t inline_segment_num = 32;
+/* Default to 10 seconds for the keep alive value. This value is arbitrary. */
+static constexpr uint32_t nvme_ctrlr_keep_alive_timeout_in_ms = 10000;
+
static void io_complete(void *t, const struct spdk_nvme_cpl *completion);
struct IORequest {
spdk_nvme_ns *ns;
uint32_t block_size = 0;
uint64_t size = 0;
+ std::thread admin_thread;
public:
std::vector<NVMEDevice*> registered_devices;
ns(ns_) {
block_size = spdk_nvme_ns_get_extended_sector_size(ns);
size = spdk_nvme_ns_get_size(ns);
+ if (trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
+ return;
+ }
+
+ // For Non-PCIe transport, we need to send keep-alive periodically.
+ admin_thread = std::thread(
+ [this]() {
+ int rc;
+ while (true) {
+ rc = spdk_nvme_ctrlr_process_admin_completions(ctrlr);
+ ceph_assert(rc >= 0);
+ sleep(1);
+ }
+ }
+ );
}
bool is_equal(const spdk_nvme_transport_id& trid2) const {
return spdk_nvme_transport_id_compare(&trid, &trid2) == 0;
}
~SharedDriverData() {
+ if (admin_thread.joinable()) {
+ admin_thread.join();
+ }
}
void register_device(NVMEDevice *device) {
struct spdk_nvme_io_qpair_opts opts = {};
spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts));
opts.qprio = SPDK_NVME_QPRIO_URGENT;
- // usable queue depth should minus 1 to aovid overflow.
+ // usable queue depth should minus 1 to avoid overflow.
max_queue_depth = opts.io_queue_size - 1;
qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, &opts, sizeof(opts));
ceph_assert(qpair != NULL);
static bool probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, struct spdk_nvme_ctrlr_opts *opts)
{
NVMEManager::ProbeContext *ctx = static_cast<NVMEManager::ProbeContext*>(cb_ctx);
+ bool do_attach = false;
- if (trid->trtype != SPDK_NVME_TRANSPORT_PCIE) {
- dout(0) << __func__ << " only probe local nvme device" << dendl;
- return false;
+ if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
+ do_attach = spdk_nvme_transport_id_compare(&ctx->trid, trid) == 0;
+ if (!do_attach) {
+ dout(0) << __func__ << " device traddr (" << ctx->trid.traddr
+ << ") not match " << trid->traddr << dendl;
+ }
+ } else {
+ // for non-pcie devices, should always match the specified trid
+ assert(!spdk_nvme_transport_id_compare(&ctx->trid, trid));
+ do_attach = true;
}
- dout(0) << __func__ << " found device at: "
- << "trtype=" << spdk_nvme_transport_id_trtype_str(trid->trtype) << ", "
- << "traddr=" << trid->traddr << dendl;
- if (spdk_nvme_transport_id_compare(&ctx->trid, trid)) {
- dout(0) << __func__ << " device traddr (" << ctx->trid.traddr << ") not match " << trid->traddr << dendl;
- return false;
- }
+ if (do_attach) {
+ dout(0) << __func__ << " found device at: "
+ << "trtype=" << spdk_nvme_transport_id_trtype_str(trid->trtype) << ", "
+ << "traddr=" << trid->traddr << dendl;
- opts->io_queue_size = UINT16_MAX;
+ opts->io_queue_size = UINT16_MAX;
+ opts->io_queue_requests = UINT16_MAX;
+ opts->keep_alive_timeout_ms = nvme_ctrlr_keep_alive_timeout_in_ms;
+ }
- return true;
+ return do_attach;
}
static void attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
}
}
- struct spdk_pci_addr pci_addr;
- int rc = spdk_pci_addr_parse(&pci_addr, trid.traddr);
- if (rc < 0) {
- derr << __func__ << " invalid transport address: " << trid.traddr << dendl;
- return -ENOENT;
- }
auto coremask_arg = g_conf().get_val<std::string>("bluestore_spdk_coremask");
int m_core_arg = find_first_bitset(coremask_arg);
// at least one core is needed for using spdk
if (!dpdk_thread.joinable()) {
dpdk_thread = std::thread(
- [this, coremask_arg, m_core_arg, mem_size_arg, pci_addr]() {
+ [this, coremask_arg, m_core_arg, mem_size_arg, trid]() {
struct spdk_env_opts opts;
- struct spdk_pci_addr addr = pci_addr;
+ struct spdk_pci_addr addr;
int r;
- spdk_env_opts_init(&opts);
+ bool local_pci_device = false;
+ int rc = spdk_pci_addr_parse(&addr, trid.traddr);
+ if (!rc) {
+ local_pci_device = true;
+ opts.pci_whitelist = &addr;
+ opts.num_pci_addr = 1;
+ }
+
+ spdk_env_opts_init(&opts);
opts.name = "nvme-device-manager";
opts.core_mask = coremask_arg.c_str();
opts.master_core = m_core_arg;
opts.mem_size = mem_size_arg;
- opts.pci_whitelist = &addr;
- opts.num_pci_addr = 1;
spdk_env_init(&opts);
spdk_unaffinitize_thread();
if (!probe_queue.empty()) {
ProbeContext* ctxt = probe_queue.front();
probe_queue.pop_front();
- r = spdk_nvme_probe(NULL, ctxt, probe_cb, attach_cb, NULL);
+ r = spdk_nvme_probe(local_pci_device ? NULL : &trid, ctxt, probe_cb, attach_cb, NULL);
if (r < 0) {
ceph_assert(!ctxt->driver);
derr << __func__ << " device probe nvme failed" << dendl;
return r;
}
if (int r = manager.try_get(trid, &driver); r < 0) {
- derr << __func__ << " failed to get nvme device with transport address " << trid.traddr << dendl;
+ derr << __func__ << " failed to get nvme device with transport address "
+ << trid.traddr << " type " << trid.trtype << dendl;
return r;
}