From d6c84d2d0f3a52d79461b2a1b07b4c5704b864e8 Mon Sep 17 00:00:00 2001 From: Mark Nelson Date: Fri, 30 Apr 2021 15:28:18 +0000 Subject: [PATCH] src/common: Update bluefs_buffered_io descrption. Signed-off-by: Mark Nelson --- src/common/options/global.yaml.in | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/common/options/global.yaml.in b/src/common/options/global.yaml.in index 459740da59ff..8d1b93b8b74f 100644 --- a/src/common/options/global.yaml.in +++ b/src/common/options/global.yaml.in @@ -6547,9 +6547,15 @@ options: desc: Enabled buffered IO for bluefs reads. long_desc: When this option is enabled, bluefs will in some cases perform buffered reads. This allows the kernel page cache to act as a secondary cache for things - like RocksDB compaction. For example, if the rocksdb block cache isn't large - enough to hold blocks from the compressed SST files itself, they can be read from - page cache instead of from the disk. + like RocksDB block reads. For example, if the rocksdb block cache isn't large + enough to hold all blocks during OMAP iteration, it may be possible to read them + from page cache instead of from the disk. This can dramatically improve + performance when the osd_memory_target is too small to hold all entries in block + cache but it does come with downsides. It has been reported to occasionally + cause excessive kernel swapping (and associated stalls) under certain workloads. + Currently the best and most consistent performing combination appears to be + enabling bluefs_buffered_io and disabling system level swap. It is possible + that this recommendation may change in the future however. default: true with_legacy: true - name: bluefs_sync_write -- 2.47.3