From b16296911b0bb554fa5a9966d4dbf285f2b590fa Mon Sep 17 00:00:00 2001 From: Ziye Yang Date: Thu, 24 Mar 2022 01:34:36 +0800 Subject: [PATCH] blk/pmem: Add the devdax support. The purpose is to make the pmem device usage more flexible than the current solution. And prepare for the potential offloading by hardware engine later. Signed-off-by: Ziye Yang --- src/blk/pmem/PMEMDevice.cc | 94 ++++++++++++++++++++++++++++++++++++-- src/blk/pmem/PMEMDevice.h | 3 +- 2 files changed, 93 insertions(+), 4 deletions(-) diff --git a/src/blk/pmem/PMEMDevice.cc b/src/blk/pmem/PMEMDevice.cc index 7b0be249b4cff..5b4745e5e5977 100644 --- a/src/blk/pmem/PMEMDevice.cc +++ b/src/blk/pmem/PMEMDevice.cc @@ -18,6 +18,11 @@ #include #include #include +#include +#include +#include +#include +#include #include "PMEMDevice.h" #include "libpmem.h" @@ -54,6 +59,55 @@ int PMEMDevice::_lock() return 0; } +static int pmem_check_file_type(int fd, const char *pmem_file, uint64_t *total_size) +{ + int rc = 0; + struct stat file_stat; + + rc = ::fstat(fd, &file_stat); + if (rc) { + return -1; + } + + if ((file_stat.st_mode & S_IFCHR) != S_IFCHR) { + return -1; + } + + char spath[PATH_MAX], npath[PATH_MAX]; + snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/subsystem", + major(file_stat.st_rdev), minor(file_stat.st_rdev)); + + char *real_path = realpath(spath, npath); + if (!real_path) { + return -1; + } + + // Need to check if it is a DAX device + char *base_name = strrchr(real_path, '/'); + if (!base_name || strcmp("dax", base_name + 1)) { + return -1; + } + + snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/size", + major(file_stat.st_rdev), minor(file_stat.st_rdev)); + FILE *sfile = fopen(spath, "r"); + if (!sfile) { + return -1; + } + + if (total_size != nullptr) { + rc = fscanf(sfile, "%lu", total_size); + if (rc < 0) { + rc = -1; + } else { + rc = 0; + } + } + + fclose(sfile); + return rc; +} + int PMEMDevice::open(const std::string& p) { path = p; @@ -67,6 +121,14 @@ int PMEMDevice::open(const std::string& p) return r; } + r = pmem_check_file_type(fd, path.c_str(), &size); + if (!r) { + dout(1) << __func__ << " This path " << path << " is a devdax dev " << dendl; + devdax_device = true; + // If using devdax char device, set it to not rotational device. + rotational = false; + } + r = _lock(); if (r < 0) { derr << __func__ << " failed to lock " << path << ": " << cpp_strerror(r) @@ -83,7 +145,9 @@ int PMEMDevice::open(const std::string& p) } size_t map_len; - addr = (char *)pmem_map_file(path.c_str(), 0, PMEM_FILE_EXCL, O_RDWR, &map_len, NULL); + addr = (char *)pmem_map_file(path.c_str(), 0, + devdax_device ? 0: PMEM_FILE_EXCL, O_RDWR, + &map_len, NULL); if (addr == NULL) { derr << __func__ << " pmem_map_file failed: " << pmem_errormsg() << dendl; goto out_fail; @@ -120,7 +184,11 @@ void PMEMDevice::close() dout(1) << __func__ << dendl; ceph_assert(addr != NULL); + if (devdax_device) { + devdax_device = false; + } pmem_unmap(addr, size); + ceph_assert(fd >= 0); VOID_TEMP_FAILURE_RETRY(::close(fd)); fd = -1; @@ -157,6 +225,10 @@ int PMEMDevice::collect_metadata(const std::string& prefix, std::map debug_inflight; @@ -40,7 +41,7 @@ class PMEMDevice : public BlockDevice { public: PMEMDevice(CephContext *cct, aio_callback_t cb, void *cbpriv); - + bool supported_bdev_label() override { return !devdax_device; } void aio_submit(IOContext *ioc) override; int collect_metadata(const std::string& prefix, std::map *pm) const override; -- 2.39.5