From 5497e2fa4ed8b5a20cd71ef2e88a119625ff42ab Mon Sep 17 00:00:00 2001 From: Haomai Wang Date: Mon, 18 Jan 2016 15:30:08 +0800 Subject: [PATCH] BlueStore: allow nvme driver read from path Signed-off-by: Haomai Wang --- src/common/config_opts.h | 11 +++++---- src/os/bluestore/BlockDevice.cc | 14 ++++++++++- src/os/bluestore/BlockDevice.h | 3 ++- src/os/bluestore/BlueFS.cc | 2 +- src/os/bluestore/BlueStore.cc | 43 ++++++++++++++++++++++++--------- src/os/bluestore/NVMEDevice.cc | 20 +++++++++++++-- src/os/bluestore/NVMEDevice.h | 2 ++ 7 files changed, 73 insertions(+), 22 deletions(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 95c1cf9650a8f..060b87d17e646 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -848,17 +848,12 @@ OPTION(memstore_device_bytes, OPT_U64, 1024*1024*1024) OPTION(memstore_page_set, OPT_BOOL, true) OPTION(memstore_page_size, OPT_U64, 64 << 10) -OPTION(bdev_backend_type, OPT_STR, "kernel") OPTION(bdev_debug_inflight_ios, OPT_BOOL, false) OPTION(bdev_inject_crash, OPT_INT, 0) // if N>0, then ~ 1/N IOs will complete before we crash on flush. OPTION(bdev_aio, OPT_BOOL, true) OPTION(bdev_aio_poll_ms, OPT_INT, 250) // milliseconds OPTION(bdev_aio_max_queue_depth, OPT_INT, 32) -// Define the whitelist of NVMe controllers to initialize. -// Users can use 'lspci -vvv -d 8086:0953 | grep "Device Serial Number"' to -// get the serial number of Intel(R) Fultondale NVMe controllers. -OPTION(bdev_nvme_serial_number, OPT_STR, "") // if yes, osd will unbind all NVMe devices from kernel driver and bind them // to the uio_pci_generic driver. The purpose is to prevent the case where // NVMe driver is loaded while osd is running. @@ -880,6 +875,12 @@ OPTION(bluestore_bluefs_min_ratio, OPT_FLOAT, .02) // min fs free / total free OPTION(bluestore_bluefs_max_ratio, OPT_FLOAT, .90) // max fs free / total free OPTION(bluestore_bluefs_gift_ratio, OPT_FLOAT, .02) // how much to add at a time OPTION(bluestore_bluefs_reclaim_ratio, OPT_FLOAT, .20) // how much to reclaim at a time +// If you want to use spdk driver, you need to specify NVMe serial number here +// with "spdk:" prefix. +// Users can use 'lspci -vvv -d 8086:0953 | grep "Device Serial Number"' to +// get the serial number of Intel(R) Fultondale NVMe controllers. +// Example: +// bluestore_block_path = spdk:55cd2e404bd73932 OPTION(bluestore_block_path, OPT_STR, "") OPTION(bluestore_block_size, OPT_U64, 10 * 1024*1024*1024) // 10gb for testing OPTION(bluestore_block_db_path, OPT_STR, "") diff --git a/src/os/bluestore/BlockDevice.cc b/src/os/bluestore/BlockDevice.cc index aed2b80a05eae..bb62faba4ad0a 100644 --- a/src/os/bluestore/BlockDevice.cc +++ b/src/os/bluestore/BlockDevice.cc @@ -14,6 +14,8 @@ * */ +#include + #include "KernelDevice.h" #if defined(HAVE_SPDK) #include "NVMEDevice.h" @@ -40,8 +42,16 @@ void IOContext::aio_wait() dout(20) << __func__ << " " << this << " done" << dendl; } -BlockDevice *BlockDevice::create(const string& type, aio_callback_t cb, void *cbpriv) +BlockDevice *BlockDevice::create(const string& path, aio_callback_t cb, void *cbpriv) { + char buf[2]; + int r = ::readlink(path.c_str(), buf, 2); + + string type = "kernel"; + if (r < 0) + type = "ust-nvme"; + dout(1) << __func__ << " path " << path << " type " << type << dendl; + if (type == "kernel") { return new KernelDevice(cb, cbpriv); } @@ -51,6 +61,8 @@ BlockDevice *BlockDevice::create(const string& type, aio_callback_t cb, void *cb } #endif + derr << __func__ << " unknown bacend " << type << dendl; + assert(0); return NULL; } diff --git a/src/os/bluestore/BlockDevice.h b/src/os/bluestore/BlockDevice.h index 47f7be54d689b..090fd1e48067f 100644 --- a/src/os/bluestore/BlockDevice.h +++ b/src/os/bluestore/BlockDevice.h @@ -68,7 +68,8 @@ public: typedef void (*aio_callback_t)(void *handle, void *aio); static BlockDevice *create( - const string& type, aio_callback_t cb, void *cbpriv); + const string& path, aio_callback_t cb, void *cbpriv); + virtual bool supported_bdev_label() { return true; } virtual void aio_submit(IOContext *ioc) = 0; diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index c9c27d3553818..adf512b69a50b 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -43,7 +43,7 @@ int BlueFS::add_block_device(unsigned id, string path) { dout(10) << __func__ << " bdev " << id << " path " << path << dendl; assert(id == bdev.size()); - BlockDevice *b = BlockDevice::create(g_conf->bdev_backend_type, NULL, NULL); //aio_cb, this); + BlockDevice *b = BlockDevice::create(path, NULL, NULL); //aio_cb, this); int r = b->open(path); if (r < 0) { delete b; diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index a2a355e796066..739f9cd47e73e 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -931,15 +931,17 @@ int BlueStore::_open_bdev(bool create) { bluestore_bdev_label_t label; assert(bdev == NULL); - bdev = BlockDevice::create(g_conf->bdev_backend_type, aio_cb, static_cast(this)); string p = path + "/block"; + bdev = BlockDevice::create(p, aio_cb, static_cast(this)); int r = bdev->open(p); if (r < 0) goto fail; - r = _check_or_set_bdev_label(p, bdev->get_size(), "main", create); - if (r < 0) - goto fail_close; + if (bdev->supported_bdev_label()) { + r = _check_or_set_bdev_label(p, bdev->get_size(), "main", create); + if (r < 0) + goto fail_close; + } return 0; fail_close: @@ -1550,17 +1552,34 @@ int BlueStore::_setup_block_symlink_or_file( { dout(20) << __func__ << " name " << name << " path " << path << " size " << size << dendl; + int r = 0; if (path.length()) { - int r = ::symlinkat(path.c_str(), path_fd, name.c_str()); - if (r < 0) { - r = -errno; - derr << __func__ << " failed to create " << name << " symlink to " - << path << ": " << cpp_strerror(r) << dendl; - return r; + string spdk_prefix = "spdk:"; + if (!path.compare(0, spdk_prefix.size(), spdk_prefix)) { + int fd = ::openat(path_fd, name.c_str(), O_CREAT|O_RDWR, 0644); + if (fd < 0) { + r = -errno; + derr << __func__ << " failed to create " << name << " file: " + << cpp_strerror(r) << dendl; + return r; + } + string serial_number = path.substr(spdk_prefix.size()); + r = ::write(fd, serial_number.c_str(), serial_number.size()); + assert(r == (int)serial_number.size()); + dout(1) << __func__ << " created " << name << " file with " << dendl; + VOID_TEMP_FAILURE_RETRY(::close(fd)); + } else { + r = ::symlinkat(path.c_str(), path_fd, name.c_str()); + if (r < 0) { + r = -errno; + derr << __func__ << " failed to create " << name << " symlink to " + << path << ": " << cpp_strerror(r) << dendl; + return r; + } } } else if (size) { struct stat st; - int r = ::fstatat(path_fd, name.c_str(), &st, 0); + r = ::fstatat(path_fd, name.c_str(), &st, 0); if (r < 0) r = -errno; if (r == -ENOENT) { @@ -1571,7 +1590,7 @@ int BlueStore::_setup_block_symlink_or_file( << cpp_strerror(r) << dendl; return r; } - int r = ::ftruncate(fd, size); + r = ::ftruncate(fd, size); assert(r == 0); dout(1) << __func__ << " created " << name << " file with size " << pretty_si_t(size) << "B" << dendl; diff --git a/src/os/bluestore/NVMEDevice.cc b/src/os/bluestore/NVMEDevice.cc index e7012f284f676..c2d9311998a0b 100644 --- a/src/os/bluestore/NVMEDevice.cc +++ b/src/os/bluestore/NVMEDevice.cc @@ -324,9 +324,25 @@ int NVMEDevice::open(string p) int r = 0; dout(1) << __func__ << " path " << p << dendl; - r = driver_data.try_get(g_conf->bdev_nvme_serial_number, &ctrlr, &name); + string serial_number; + int fd = ::open(p.c_str(), O_RDONLY); + if (fd < 0) { + r = -errno; + derr << __func__ << " unable to open " << p << ": " << cpp_strerror(r) + << dendl; + return r; + } + char buf[100]; + r = ::read(fd, buf, sizeof(buf)); + if (r <= 0) { + r = -errno; + derr << __func__ << " unable to read " << p << ": " << cpp_strerror(r) << dendl; + return r; + } + serial_number = string(buf, r); + r = driver_data.try_get(serial_number, &ctrlr, &name); if (r < 0) { - derr << __func__ << " failed to get nvme deivce with sn " << g_conf->bdev_nvme_serial_number << dendl; + derr << __func__ << " failed to get nvme deivce with sn " << serial_number << dendl; return r; } diff --git a/src/os/bluestore/NVMEDevice.h b/src/os/bluestore/NVMEDevice.h index 3825c590b6db4..9ef7053470691 100644 --- a/src/os/bluestore/NVMEDevice.h +++ b/src/os/bluestore/NVMEDevice.h @@ -106,6 +106,8 @@ class NVMEDevice : public BlockDevice { NVMEDevice(aio_callback_t cb, void *cbpriv); + bool supported_bdev_label() override { return false; } + void aio_submit(IOContext *ioc) override; uint64_t get_size() const override { -- 2.39.5