From: ZhengYuan Huang Date: Fri, 13 Mar 2026 09:19:23 +0000 (+0800) Subject: btrfs: revalidate cached tree blocks on the uptodate path X-Git-Tag: ceph-for-7.1-rc4~321^2~50 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f04c6475c2db778e7a9657a7f7b4a5033c933ff1;p=ceph-client.git btrfs: revalidate cached tree blocks on the uptodate path read_extent_buffer_pages_nowait() returns immediately when an extent buffer is already marked uptodate. On that cache-hit path, the caller supplied btrfs_tree_parent_check is not re-run. This can let read_tree_root_path() accept a cached tree block whose actual header level/owner does not match the expected value derived from the parent. E.g. a corrupted root item that points to a tree block which doesn't even belong to that root, and has mismatching level/owner. But that tree block is already read and cached, later the corrupted tree root got read from disk and hit the cached tree block. Fix this by re-validating cached extent buffers against the supplied btrfs_tree_parent_check on the uptodate path, and make read_tree_root_path() pass its check to btrfs_buffer_uptodate(). This makes cache hits and fresh reads follow the same tree-parent verification rules, and turns the corruption into a read failure instead of constructing an inconsistent root object. Signed-off-by: ZhengYuan Huang Reviewed-by: Qu Wenruo [ Resolve the conflict with extent_buffer_uptodate() helper, handle transid mismatch case ] Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 71e7ada95477..cdd6c1422b53 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1499,7 +1499,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, reada_for_search(fs_info, p, parent_level, slot, key->objectid); /* first we do an atomic uptodate check */ - if (btrfs_buffer_uptodate(tmp, check.transid, true) > 0) { + if (btrfs_buffer_uptodate(tmp, check.transid, true, NULL) > 0) { /* * Do extra check for first_key, eb can be stale due to * being cached, read from scrub, or have multiple diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5b0348967bfc..45e6dde60274 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -109,13 +109,21 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result) * detect blocks that either didn't get written at all or got written * in the wrong place. */ -int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, bool atomic) +int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, bool atomic, + const struct btrfs_tree_parent_check *check) { if (!extent_buffer_uptodate(eb)) return 0; - if (!parent_transid || btrfs_header_generation(eb) == parent_transid) + if (!parent_transid || btrfs_header_generation(eb) == parent_transid) { + /* + * On a cache hit, the caller may still need tree parent + * verification before reusing the buffer. + */ + if (unlikely(check && btrfs_verify_level_key(eb, check))) + return -EUCLEAN; return 1; + } if (atomic) return -EAGAIN; @@ -992,8 +1000,11 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root, root->node = NULL; goto fail; } - if (unlikely(!btrfs_buffer_uptodate(root->node, generation, false))) { - ret = -EIO; + + ret = btrfs_buffer_uptodate(root->node, generation, false, &check); + if (unlikely(ret <= 0)) { + if (ret == 0) + ret = -EIO; goto fail; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 2742e6aac7dd..343e332b17c0 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -107,7 +107,8 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root) void btrfs_put_root(struct btrfs_root *root); void btrfs_mark_buffer_dirty(struct btrfs_trans_handle *trans, struct extent_buffer *buf); -int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, bool atomic); +int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, bool atomic, + const struct btrfs_tree_parent_check *check); int btrfs_read_extent_buffer(struct extent_buffer *buf, const struct btrfs_tree_parent_check *check); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c341b97d54e9..5818ce1ae89d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5780,7 +5780,7 @@ static int check_next_block_uptodate(struct btrfs_trans_handle *trans, generation = btrfs_node_ptr_generation(path->nodes[level], path->slots[level]); - if (btrfs_buffer_uptodate(next, generation, false)) + if (btrfs_buffer_uptodate(next, generation, false, NULL)) return 0; check.level = level - 1; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9648fbd20137..f59d1f6aeaa2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3901,8 +3901,17 @@ int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num, struct btrfs_fs_info *fs_info = eb->fs_info; struct btrfs_bio *bbio; - if (extent_buffer_uptodate(eb)) + if (extent_buffer_uptodate(eb)) { + int ret; + + ret = btrfs_buffer_uptodate(eb, 0, true, check); + if (unlikely(ret <= 0)) { + if (ret == 0) + ret = -EIO; + return ret; + } return 0; + } /* * We could have had EXTENT_BUFFER_UPTODATE cleared by the write @@ -3923,7 +3932,15 @@ int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num, * will now be set, and we shouldn't read it in again. */ if (unlikely(extent_buffer_uptodate(eb))) { + int ret; + clear_extent_buffer_reading(eb); + ret = btrfs_buffer_uptodate(eb, 0, true, check); + if (unlikely(ret <= 0)) { + if (ret == 0) + ret = -EIO; + return ret; + } return 0; } @@ -4636,7 +4653,7 @@ void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info, if (IS_ERR(eb)) return; - if (btrfs_buffer_uptodate(eb, gen, true)) { + if (btrfs_buffer_uptodate(eb, gen, true, NULL)) { free_extent_buffer(eb); return; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a87fdfd465b3..6cf2828ba500 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -457,7 +457,7 @@ static int process_one_buffer(struct extent_buffer *eb, return ret; } - if (btrfs_buffer_uptodate(eb, gen, false) && level == 0) { + if (btrfs_buffer_uptodate(eb, gen, false, NULL) && level == 0) { ret = btrfs_exclude_logged_extents(eb); if (ret) btrfs_abort_transaction(trans, ret);