From: Curt Bruns Date: Fri, 15 Oct 2021 23:42:25 +0000 (-0400) Subject: os/bluestore: Set min_alloc_size to optimal io size X-Git-Tag: v17.1.0~313^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=c1f8bdc0f0a4c7aed3bb60bb3c11622f270291dd;p=ceph.git os/bluestore: Set min_alloc_size to optimal io size Block devices may report an "optimal_io_size" that is different than the typical 4KiB. To optimize BlueStore for this io size, the allocator needs to set its min_alloc_size to this optimal_io_size. This PR adds the discovery of the optimal_io_size for a block device and an option to use the optimal_io_size as the min_alloc_size for the bluestore allocator. Older devices may report an optimal_io_size of 0 and if that is the case, the default config min_alloc_size is used. Signed-off-by: Curt Bruns --- diff --git a/src/blk/BlockDevice.h b/src/blk/BlockDevice.h index 01fa0d445120..4eef1ef0df4c 100644 --- a/src/blk/BlockDevice.h +++ b/src/blk/BlockDevice.h @@ -166,6 +166,7 @@ private: protected: uint64_t size = 0; uint64_t block_size = 0; + uint64_t optimal_io_size = 0; bool support_discard = false; bool rotational = true; bool lock_exclusive = true; @@ -224,6 +225,7 @@ public: uint64_t get_size() const { return size; } uint64_t get_block_size() const { return block_size; } + uint64_t get_optimal_io_size() const { return optimal_io_size; } /// hook to provide utilization of thinly-provisioned device virtual bool get_thin_utilization(uint64_t *total, uint64_t *avail) const { diff --git a/src/blk/kernel/KernelDevice.cc b/src/blk/kernel/KernelDevice.cc index f64f57c9cee3..ebd387aa17e9 100644 --- a/src/blk/kernel/KernelDevice.cc +++ b/src/blk/kernel/KernelDevice.cc @@ -233,6 +233,7 @@ int KernelDevice::open(const string& p) dout(20) << __func__ << " devname " << devname << dendl; rotational = blkdev_buffered.is_rotational(); support_discard = blkdev_buffered.support_discard(); + optimal_io_size = blkdev_buffered.get_optimal_io_size(); this->devname = devname; _detect_vdo(); } @@ -320,6 +321,7 @@ int KernelDevice::collect_metadata(const string& prefix, map *pm) (*pm)[prefix + "rotational"] = stringify((int)(bool)rotational); (*pm)[prefix + "size"] = stringify(get_size()); (*pm)[prefix + "block_size"] = stringify(get_block_size()); + (*pm)[prefix + "optimal_io_size"] = stringify(get_optimal_io_size()); (*pm)[prefix + "driver"] = "KernelDevice"; if (rotational) { (*pm)[prefix + "type"] = "hdd"; diff --git a/src/common/blkdev.cc b/src/common/blkdev.cc index ea81bf0ae1e9..e9d3cc1c381d 100644 --- a/src/common/blkdev.cc +++ b/src/common/blkdev.cc @@ -217,6 +217,11 @@ int BlkDev::discard(int64_t offset, int64_t len) const return ioctl(fd, BLKDISCARD, range); } +int BlkDev::get_optimal_io_size() const +{ + return get_int_property("queue/optimal_io_size"); +} + bool BlkDev::is_rotational() const { return get_int_property("queue/rotational") > 0; diff --git a/src/common/blkdev.h b/src/common/blkdev.h index f8b089b0ca7c..ed9da450e0f2 100644 --- a/src/common/blkdev.h +++ b/src/common/blkdev.h @@ -56,6 +56,7 @@ public: int partition(char* partition, size_t max) const; // from a device (e.g., "sdb") bool support_discard() const; + int get_optimal_io_size() const; bool is_rotational() const; int get_numa_node(int *node) const; int dev(char *dev, size_t max) const; diff --git a/src/common/options/global.yaml.in b/src/common/options/global.yaml.in index a0fcb61b5c31..04552bf2ea6c 100644 --- a/src/common/options/global.yaml.in +++ b/src/common/options/global.yaml.in @@ -4287,6 +4287,16 @@ options: flags: - create with_legacy: true +- name: bluestore_use_optimal_io_size_for_min_alloc_size + type: bool + level: advanced + desc: Discover media optimal IO Size and use for min_alloc_size + default: false + see_also: + - bluestore_min_alloc_size + flags: + - create + with_legacy: true - name: bluestore_max_alloc_size type: size level: advanced diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 0a215eefefd8..045941b280fe 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -5376,6 +5376,8 @@ int BlueStore::_open_bdev(bool create) if (r < 0) { goto fail_close; } + // get block dev optimal io size + optimal_io_size = bdev->get_optimal_io_size(); return 0; @@ -6859,7 +6861,14 @@ int BlueStore::mkfs() dout(10) << " freelist_type " << freelist_type << dendl; // choose min_alloc_size - if (cct->_conf->bluestore_min_alloc_size) { + dout(5) << __func__ << " optimal_io_size 0x" << std::hex << optimal_io_size + << " block_size: 0x" << block_size << std::dec << dendl; + if ((cct->_conf->bluestore_use_optimal_io_size_for_min_alloc_size) && (optimal_io_size != 0)) { + dout(5) << __func__ << " optimal_io_size 0x" << std::hex << optimal_io_size + << " for min_alloc_size 0x" << min_alloc_size << std::dec << dendl; + min_alloc_size = optimal_io_size; + } + else if (cct->_conf->bluestore_min_alloc_size) { min_alloc_size = cct->_conf->bluestore_min_alloc_size; } else { ceph_assert(bdev); @@ -6881,6 +6890,16 @@ int BlueStore::mkfs() goto out_close_bdev; } + // make sure min_alloc_size is >= and aligned with block size + if (min_alloc_size % block_size != 0) { + derr << __func__ << " min_alloc_size 0x" + << std::hex << min_alloc_size + << " is less or not aligned with block_size: 0x" + << block_size << std::dec << dendl; + r = -EINVAL; + goto out_close_bdev; + } + r = _create_alloc(); if (r < 0) { goto out_close_bdev; diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 0a157edce116..b2edf00e3620 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2124,6 +2124,7 @@ private: uint64_t block_size = 0; ///< block size of block device (power of 2) uint64_t block_mask = 0; ///< mask to get just the block offset size_t block_size_order = 0; ///< bits to shift to get block size + uint64_t optimal_io_size = 0;///< best performance io size for block device uint64_t min_alloc_size; ///< minimum allocation unit (power of 2) ///< bits for min_alloc_size