From cc38d178ff33543cdb0bd58cfbb9a7c41372ff75 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 23 Apr 2025 18:28:02 +0930 Subject: [PATCH] btrfs: enable large data folio support under CONFIG_BTRFS_EXPERIMENTAL With all the preparation patches already merged, it's pretty easy to enable large data folios: - Remove the ASSERT() on folio size in btrfs_end_repair_bio() - Add a helper to properly set the max folio order Currently due to several call sites that are fetching the bitmap content directly into an unsigned long, we can only support BITS_PER_LONG blocks for each bitmap. - Call the helper when reading/creating an inode The support has the following limitations: - No large folios for data reloc inode The relocation code still requires page sized folio. But it's not that hot nor common compared to regular buffered ios. Will be improved in the future. - Requires CONFIG_BTRFS_EXPERIMENTAL - Will require all folio related operations to check if it needs the extra btrfs_subpage structure Now any folio larger than block size will need btrfs_subpage structure handling. Unfortunately I do not have a physical machine for performance test, but if everything goes like XFS/EXT4, it should mostly bring single digits percentage performance improvement in the real world. Although I believe there are still quite some optimizations to be done, let's focus on testing the current large data folio support first. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/Kconfig | 2 ++ fs/btrfs/bio.c | 6 ------ fs/btrfs/btrfs_inode.h | 17 +++++++++++++++++ fs/btrfs/inode.c | 2 ++ fs/btrfs/subpage.h | 14 -------------- 5 files changed, 21 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index c352f3ae0385c..ea95c90c84748 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -114,6 +114,8 @@ config BTRFS_EXPERIMENTAL - extent tree v2 - complex rework of extent tracking + - large folio support + If unsure, say N. config BTRFS_FS_REF_VERIFY diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index e7d436c6aec2b..00d274ed2b1fa 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -165,12 +165,6 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio, struct bio_vec *bv = bio_first_bvec_all(&repair_bbio->bio); int mirror = repair_bbio->mirror_num; - /* - * We can only trigger this for data bio, which doesn't support larger - * folios yet. - */ - ASSERT(folio_order(page_folio(bv->bv_page)) == 0); - if (repair_bbio->bio.bi_status || !btrfs_data_csum_ok(repair_bbio, dev, 0, bv)) { bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index a79fa0726f1d9..7545a1fa59979 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -525,6 +525,23 @@ static inline void btrfs_update_inode_mapping_flags(struct btrfs_inode *inode) mapping_set_stable_writes(inode->vfs_inode.i_mapping); } +static inline void btrfs_set_inode_mapping_order(struct btrfs_inode *inode) +{ + /* Metadata inode should not reach here. */ + ASSERT(is_data_inode(inode)); + + /* For data reloc inode, it still requires page sized folio. */ + if (unlikely(btrfs_is_data_reloc_root(inode->root))) + return; + + /* We only allow BITS_PER_LONGS blocks for each bitmap. */ +#ifdef CONFIG_BTRFS_EXPERIMENTAL + mapping_set_folio_order_range(inode->vfs_inode.i_mapping, 0, + ilog2(((BITS_PER_LONG << inode->root->fs_info->sectorsize_bits) + >> PAGE_SHIFT))); +#endif +} + /* Array of bytes with variable length, hexadecimal format 0x1234 */ #define CSUM_FMT "0x%*phN" #define CSUM_FMT_VALUE(size, bytes) size, bytes diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cbc95bceb7ff1..b66eee5ab5d85 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3946,6 +3946,7 @@ static int btrfs_read_locked_inode(struct btrfs_inode *inode, struct btrfs_path btrfs_inode_split_flags(btrfs_inode_flags(leaf, inode_item), &inode->flags, &inode->ro_flags); btrfs_update_inode_mapping_flags(inode); + btrfs_set_inode_mapping_order(inode); cache_index: /* @@ -6463,6 +6464,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW | BTRFS_INODE_NODATASUM; btrfs_update_inode_mapping_flags(BTRFS_I(inode)); + btrfs_set_inode_mapping_order(BTRFS_I(inode)); } ret = btrfs_insert_inode_locked(inode); diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h index b6e40a678d738..ee0710eb13fd0 100644 --- a/fs/btrfs/subpage.h +++ b/fs/btrfs/subpage.h @@ -92,7 +92,6 @@ enum btrfs_folio_type { BTRFS_SUBPAGE_DATA, }; -#if PAGE_SIZE > BTRFS_MIN_BLOCKSIZE /* * Subpage support for metadata is more complex, as we can have dummy extent * buffers, where folios have no mapping to determine the owning inode. @@ -113,19 +112,6 @@ static inline bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, ASSERT(is_data_inode(BTRFS_I(folio->mapping->host))); return fs_info->sectorsize < folio_size(folio); } -#else -static inline bool btrfs_meta_is_subpage(const struct btrfs_fs_info *fs_info) -{ - return false; -} -static inline bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, - struct folio *folio) -{ - if (folio->mapping && folio->mapping->host) - ASSERT(is_data_inode(BTRFS_I(folio->mapping->host))); - return false; -} -#endif int btrfs_attach_folio_state(const struct btrfs_fs_info *fs_info, struct folio *folio, enum btrfs_folio_type type); -- 2.39.5