]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-client.git/commitdiff
btrfs: revalidate cached tree blocks on the uptodate path
authorZhengYuan Huang <gality369@gmail.com>
Fri, 13 Mar 2026 09:19:23 +0000 (17:19 +0800)
committerDavid Sterba <dsterba@suse.com>
Tue, 7 Apr 2026 16:56:02 +0000 (18:56 +0200)
read_extent_buffer_pages_nowait() returns immediately when an extent
buffer is already marked uptodate. On that cache-hit path,
the caller supplied btrfs_tree_parent_check is not re-run.

This can let read_tree_root_path() accept a cached tree block whose
actual header level/owner does not match the expected value derived from
the parent.

E.g. a corrupted root item that points to a tree block which doesn't
even belong to that root, and has mismatching level/owner.

But that tree block is already read and cached, later the corrupted tree
root got read from disk and hit the cached tree block.

Fix this by re-validating cached extent buffers against the supplied
btrfs_tree_parent_check on the uptodate path, and make
read_tree_root_path() pass its check to btrfs_buffer_uptodate().

This makes cache hits and fresh reads follow the same tree-parent
verification rules, and turns the corruption into a read failure instead
of constructing an inconsistent root object.

Signed-off-by: ZhengYuan Huang <gality369@gmail.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
[ Resolve the conflict with extent_buffer_uptodate() helper, handle
  transid mismatch case ]
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ctree.c
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/tree-log.c

index 71e7ada954777512ed015eb66a22cb9d83a123b1..cdd6c1422b53aa79eed109c16d249d059555b1fc 100644 (file)
@@ -1499,7 +1499,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
                        reada_for_search(fs_info, p, parent_level, slot, key->objectid);
 
                /* first we do an atomic uptodate check */
-               if (btrfs_buffer_uptodate(tmp, check.transid, true) > 0) {
+               if (btrfs_buffer_uptodate(tmp, check.transid, true, NULL) > 0) {
                        /*
                         * Do extra check for first_key, eb can be stale due to
                         * being cached, read from scrub, or have multiple
index 5b0348967bfcb036e4a8f8540e3b82685c63fe69..45e6dde6027445f8ceb75fa8d589bd7d2302282e 100644 (file)
@@ -109,13 +109,21 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
  * detect blocks that either didn't get written at all or got written
  * in the wrong place.
  */
-int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, bool atomic)
+int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, bool atomic,
+                         const struct btrfs_tree_parent_check *check)
 {
        if (!extent_buffer_uptodate(eb))
                return 0;
 
-       if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
+       if (!parent_transid || btrfs_header_generation(eb) == parent_transid) {
+               /*
+                * On a cache hit, the caller may still need tree parent
+                * verification before reusing the buffer.
+                */
+               if (unlikely(check && btrfs_verify_level_key(eb, check)))
+                       return -EUCLEAN;
                return 1;
+       }
 
        if (atomic)
                return -EAGAIN;
@@ -992,8 +1000,11 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
                root->node = NULL;
                goto fail;
        }
-       if (unlikely(!btrfs_buffer_uptodate(root->node, generation, false))) {
-               ret = -EIO;
+
+       ret = btrfs_buffer_uptodate(root->node, generation, false, &check);
+       if (unlikely(ret <= 0)) {
+               if (ret == 0)
+                       ret = -EIO;
                goto fail;
        }
 
index 2742e6aac7dd58f75e90b50c4a98db188ddb4acf..343e332b17c0b6ae41e268a5b1d550d230823f16 100644 (file)
@@ -107,7 +107,8 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
 void btrfs_put_root(struct btrfs_root *root);
 void btrfs_mark_buffer_dirty(struct btrfs_trans_handle *trans,
                             struct extent_buffer *buf);
-int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, bool atomic);
+int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, bool atomic,
+                         const struct btrfs_tree_parent_check *check);
 int btrfs_read_extent_buffer(struct extent_buffer *buf,
                             const struct btrfs_tree_parent_check *check);
 
index c341b97d54e9f563f4802cd1ca3aecfae91c5a12..5818ce1ae89d90630340ca0051ef75bd64ed9fcc 100644 (file)
@@ -5780,7 +5780,7 @@ static int check_next_block_uptodate(struct btrfs_trans_handle *trans,
 
        generation = btrfs_node_ptr_generation(path->nodes[level], path->slots[level]);
 
-       if (btrfs_buffer_uptodate(next, generation, false))
+       if (btrfs_buffer_uptodate(next, generation, false, NULL))
                return 0;
 
        check.level = level - 1;
index 9648fbd2013788fb5b1e3d5834bd9733d7adbab7..f59d1f6aeaa2a5b1a90ff7fce69e19b815e09098 100644 (file)
@@ -3901,8 +3901,17 @@ int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num,
        struct btrfs_fs_info *fs_info = eb->fs_info;
        struct btrfs_bio *bbio;
 
-       if (extent_buffer_uptodate(eb))
+       if (extent_buffer_uptodate(eb)) {
+               int ret;
+
+               ret = btrfs_buffer_uptodate(eb, 0, true, check);
+               if (unlikely(ret <= 0)) {
+                       if (ret == 0)
+                               ret = -EIO;
+                       return ret;
+               }
                return 0;
+       }
 
        /*
         * We could have had EXTENT_BUFFER_UPTODATE cleared by the write
@@ -3923,7 +3932,15 @@ int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num,
         * will now be set, and we shouldn't read it in again.
         */
        if (unlikely(extent_buffer_uptodate(eb))) {
+               int ret;
+
                clear_extent_buffer_reading(eb);
+               ret = btrfs_buffer_uptodate(eb, 0, true, check);
+               if (unlikely(ret <= 0)) {
+                       if (ret == 0)
+                               ret = -EIO;
+                       return ret;
+               }
                return 0;
        }
 
@@ -4636,7 +4653,7 @@ void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
        if (IS_ERR(eb))
                return;
 
-       if (btrfs_buffer_uptodate(eb, gen, true)) {
+       if (btrfs_buffer_uptodate(eb, gen, true, NULL)) {
                free_extent_buffer(eb);
                return;
        }
index a87fdfd465b31defdca5343d141e797a94942b88..6cf2828ba5001f89065d6ff8712fc714fb5800a2 100644 (file)
@@ -457,7 +457,7 @@ static int process_one_buffer(struct extent_buffer *eb,
                        return ret;
                }
 
-               if (btrfs_buffer_uptodate(eb, gen, false) && level == 0) {
+               if (btrfs_buffer_uptodate(eb, gen, false, NULL) && level == 0) {
                        ret = btrfs_exclude_logged_extents(eb);
                        if (ret)
                                btrfs_abort_transaction(trans, ret);