]> git-server-git.apps.pok.os.sepia.ceph.com Git - xfstests-dev.git/commitdiff
fstests: generic/746: update the parser to handle block group tree
authorQu Wenruo <wqu@suse.com>
Fri, 5 Dec 2025 07:17:26 +0000 (17:47 +1030)
committerZorro Lang <zlang@kernel.org>
Thu, 1 Jan 2026 13:14:26 +0000 (21:14 +0800)
[FALSE ALERT]
The test case will fail on btrfs if the new block-group-tree feature is
enabled:

FSTYP         -- btrfs
PLATFORM      -- Linux/x86_64 btrfs-vm 6.18.0-rc6-custom+ #321 SMP PREEMPT_DYNAMIC Sun Nov 23 16:34:33 ACDT 2025
MKFS_OPTIONS  -- -O block-group-tree /dev/mapper/test-scratch1
MOUNT_OPTIONS -- /dev/mapper/test-scratch1 /mnt/scratch

generic/746 44s ... [failed, exit status 1]- output mismatch (see xfstests-dev/results//generic/746.out.bad)
    --- tests/generic/746.out 2024-06-27 13:55:51.286338519 +0930
    +++ xfstests-dev/results//generic/746.out.bad 2025-11-28 07:47:17.039827837 +1030
    @@ -2,4 +2,4 @@
     Generating garbage on loop...done.
     Running fstrim...done.
     Detecting interesting holes in image...done.
    -Comparing holes to the reported space from FS...done.
    +Comparing holes to the reported space from FS...Sectors 256-2111 are not marked as free!
    ...
    (Run 'diff -u xfstests-dev/tests/generic/746.out xfstests-dev/results//generic/746.out.bad'  to see the entire diff)

[CAUSE]
Sectors [256, 2048) are the from the reserved first 1M free space.
Sectors [2048, 2112) are the leading free space in the chunk tree.
Sectors [2112, 2144) is the first tree block in the chunk tree.

However the reported free sectors from get_free_sectors() looks like this:

  2144 10566
  10688 11711
  ...

Note that there should be a free sector range in [2048, 2112) but it's
not reported in get_free_sectors().

The get_free_sectors() call is fs dependent, and for btrfs it's using
parse-extent-tree.awk script to handle the extent tree dump.

The script uses BLOCK_GROUP_ITEM items to detect the beginning of a
block group so that it can calculate the hole between the beginning of a
block group and the first data/metadata item.

However block-group-tree feature moves BLOCK_GROUP_ITEM items to a
dedicated tree, making the existing script unable to parse the free
space at the beginning of a block group.

[FIX]
Introduce a new script, parse-free-space.py, that accepts two tree
dumps:

- block group tree dump
  If the fs has block-group-tree feature, it's the block group tree
  dump.
  Otherwise the regular extent tree dump is enough.

- extent tree dump
  The usual extent tree dump.

With a dedicated block group tree dump, the script can correctly handle
the beginning part of free space, no matter if block-group-tree feature
is enabled or not.

And with this parser, the old parse-extent-tree.awk can be retired.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Reviewed-by: Zorro Lang <zlang@redhat.com>
Signed-off-by: Zorro Lang <zlang@kernel.org>
src/Makefile
src/parse-extent-tree.awk [deleted file]
src/parse-free-space.py [new file with mode: 0755]
tests/generic/746

index 711dbb917b3a8339f54d397c289e26f21ecba4cf..d0a4106e6be81b61a3ef75efa8825569f780a693 100644 (file)
@@ -40,7 +40,7 @@ LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
 
 EXTRA_EXECS = dmerror fill2attr fill2fs fill2fs_check scaleread.sh \
              btrfs_crc32c_forged_name.py popdir.pl popattr.py \
-             soak_duration.awk parse-dev-tree.awk parse-extent-tree.awk
+             soak_duration.awk parse-dev-tree.awk parse-free-space.py
 
 SUBDIRS = log-writes perf
 
diff --git a/src/parse-extent-tree.awk b/src/parse-extent-tree.awk
deleted file mode 100755 (executable)
index 1e69693..0000000
+++ /dev/null
@@ -1,144 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# Copyright (c) 2019 Nikolay Borisov, SUSE LLC.  All Rights Reserved.
-#
-# Parses btrfs' extent tree for holes. Holes are the ranges between 2 adjacent
-# extent blocks. For example if we have the following 2 metadata items in the
-# extent tree:
-#      item 6 key (30425088 METADATA_ITEM 0) itemoff 16019 itemsize 33
-#      item 7 key (30490624 METADATA_ITEM 0) itemoff 15986 itemsize 33
-#
-# There is a whole of 64k between then - 30490624−30425088 = 65536
-# Same logic applies for adjacent EXTENT_ITEMS.
-#
-# The script requires the following parameters passed on command line:
-#     * sectorsize - how many bytes per sector, used to convert the output of
-#     the script to sectors.
-#     * nodesize - size of metadata extents, used for internal calculations
-
-# Given an extent line "item 2 key (13672448 EXTENT_ITEM 65536) itemoff 16153 itemsize 53"
-# or "item 6 key (30425088 METADATA_ITEM 0) itemoff 16019 itemsize 33" returns
-# either 65536 (for data extents) or the fixes nodesize value for metadata
-# extents.
-function get_extent_size(line, tmp) {
-       if (line ~ data_match || line ~ bg_match) {
-               split(line, tmp)
-               gsub(/\)/,"", tmp[6])
-               return tmp[6]
-       } else if (line ~ metadata_match) {
-               return nodesize
-       }
-}
-
-# given a 'item 2 key (13672448 EXTENT_ITEM 65536) itemoff 16153 itemsize 53'
-# and returns 13672448.
-function get_extent_offset(line, tmp) {
-       split(line, tmp)
-       gsub(/\(/,"",tmp[4])
-       return tmp[4]
-}
-
-# This function parses all the extents belonging to a particular block group
-# which are accumulated in lines[] and calculates the offsets of the holes
-# part of this block group.
-#
-# base_offset and bg_line are local variables
-function print_array(base_offset, bg_line)
-{
-       if (match(lines[0], bg_match)) {
-               # we don't have an extent at the beginning of of blockgroup, so we
-               # have a hole between blockgroup offset and first extent offset
-               bg_line = lines[0]
-               prev_size=0
-               prev_offset=get_extent_offset(bg_line)
-               delete lines[0]
-       } else {
-               # we have an extent at the beginning of block group, so initialize
-               # the prev_* vars correctly
-               bg_line = lines[1]
-               prev_size = get_extent_size(lines[0])
-               prev_offset = get_extent_offset(lines[0])
-               delete lines[1]
-               delete lines[0]
-       }
-
-       bg_offset=get_extent_offset(bg_line)
-       bgend=bg_offset + get_extent_size(bg_line)
-
-       for (i in lines) {
-                       cur_size = get_extent_size(lines[i])
-                       cur_offset = get_extent_offset(lines[i])
-                       if (cur_offset  != prev_offset + prev_size)
-                               print int((prev_size + prev_offset) / sectorsize), int((cur_offset-1) / sectorsize)
-                       prev_size = cur_size
-                       prev_offset = cur_offset
-       }
-
-       print int((prev_size + prev_offset) / sectorsize), int((bgend-1) / sectorsize)
-       total_printed++
-       delete lines
-}
-
-BEGIN {
-       loi_match="^.item [0-9]* key \\([0-9]* (BLOCK_GROUP_ITEM|METADATA_ITEM|EXTENT_ITEM) [0-9]*\\).*"
-       metadata_match="^.item [0-9]* key \\([0-9]* METADATA_ITEM [0-9]*\\).*"
-       data_match="^.item [0-9]* key \\([0-9]* EXTENT_ITEM [0-9]*\\).*"
-       bg_match="^.item [0-9]* key \\([0-9]* BLOCK_GROUP_ITEM [0-9]*\\).*"
-       node_match="^node.*$"
-       leaf_match="^leaf [0-9]* flags"
-       line_count=0
-       total_printed=0
-       skip_lines=0
-}
-
-{
-       # skip lines not belonging to a leaf
-       if (match($0, node_match)) {
-               skip_lines=1
-       } else if (match($0, leaf_match)) {
-               skip_lines=0
-       }
-
-       if (!match($0, loi_match) || skip_lines == 1) next;
-
-       # we have a line of interest, we need to parse it. First check if there is
-       # anything in the array
-       if (line_count==0) {
-               lines[line_count++]=$0;
-       } else {
-               prev_line=lines[line_count-1]
-               split(prev_line, prev_line_fields)
-               prev_objectid=prev_line_fields[4]
-               objectid=$4
-
-               if (objectid == prev_objectid && match($0, bg_match)) {
-                       if (total_printed>0) {
-                               # We are adding a BG after we have added its first extent
-                               # previously, consider this a record ending event and just print
-                               # the array
-
-                               delete lines[line_count-1]
-                               print_array()
-                               # we now start a new array with current and previous lines
-                               line_count=0
-                               lines[line_count++]=prev_line
-                               lines[line_count++]=$0
-                       } else {
-                               # first 2 added lines are EXTENT and BG that match, in this case
-                               # just add them
-                               lines[line_count++]=$0
-
-                       }
-               } else if (match($0, bg_match)) {
-                       # ordinary end of record
-                       print_array()
-                       line_count=0
-                       lines[line_count++]=$0
-               } else {
-                       lines[line_count++]=$0
-               }
-       }
-}
-
-END {
-       print_array()
-}
diff --git a/src/parse-free-space.py b/src/parse-free-space.py
new file mode 100755 (executable)
index 0000000..3c76171
--- /dev/null
@@ -0,0 +1,122 @@
+#!/usr/bin/python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Parse block group and extent tree output to create a free sector list.
+#
+# Usage:
+#  ./parse-free-space -n <nodesize> -b <bg_dump> -e <extent_dump>
+#
+# nodesize:     The nodesize of the btrfs
+# bg_dump:      The tree dump file that contains block group items
+#               If block-group-tree feature is enabled, it's block group tree dump.
+#               Otherwise it's extent tree dump
+# extent_dump:  The tree dump file that contains extent items
+#               Just the extent tree dump, requires all tree block and data have
+#               corresponding extent/metadata item.
+#
+# The output is "%d %d", the first one is the sector number (in 512 byte) of the
+# free space, the second one is the last sector number of the free range.
+
+import getopt
+import sys
+import re
+
+bg_match = "^.item [0-9]* key \\([0-9]* BLOCK_GROUP_ITEM [0-9]*\\).*"
+metadata_match="^.item [0-9]* key \\([0-9]* METADATA_ITEM [0-9]*\\).*"
+data_match="^.item [0-9]* key \\([0-9]* EXTENT_ITEM [0-9]*\\).*"
+
+def parse_block_groups(file_path):
+    bg_list = []
+    with open(file_path, 'r') as bg_file:
+        for line in bg_file:
+            match = re.search(bg_match, line)
+            if match == None:
+                continue
+            start = match.group(0).split()[3][1:]
+            length = match.group(0).split()[5][:-1]
+            bg_list.append({'start': int(start), 'length': int(length)})
+    return sorted(bg_list, key=lambda d: d['start'])
+
+def parse_extents(file_path):
+    extent_list = []
+    with open(file_path, 'r') as bg_file:
+        for line in bg_file:
+            match = re.search(data_match, line)
+            if match:
+                start = match.group(0).split()[3][1:]
+                length = match.group(0).split()[5][:-1]
+                extent_list.append({'start': int(start), 'length': int(length)})
+                continue
+            match = re.search(metadata_match, line)
+            if match:
+                start = match.group(0).split()[3][1:]
+                length = nodesize
+                extent_list.append({'start': int(start), 'length': int(length)})
+                continue
+    return sorted(extent_list, key=lambda d: d['start'])
+
+def range_end(range):
+    return range['start'] + range['length']
+
+def calc_free_spaces(bg_list, extent_list):
+    free_list = []
+    bg_iter = iter(bg_list)
+    cur_bg = next(bg_iter)
+    prev_end = cur_bg['start']
+
+    for cur_extent in extent_list:
+        # Finished one bg, add the remaining free space.
+        while range_end(cur_bg) <= cur_extent['start']:
+            if range_end(cur_bg) > prev_end:
+                free_list.append({'start': prev_end,
+                                  'length': range_end(cur_bg) - prev_end})
+            cur_bg = next(bg_iter)
+            prev_end = cur_bg['start']
+
+        if prev_end < cur_extent['start']:
+            free_list.append({'start': prev_end,
+                              'length': cur_extent['start'] - prev_end})
+        prev_end = range_end(cur_extent)
+
+    # Handle the remaining part in the bg
+    if range_end(cur_bg) > prev_end:
+        free_list.append({'start': prev_end,
+                          'length': range_end(cur_bg) - prev_end})
+
+    # Handle the remaining empty bgs (if any)
+    for cur_bg in bg_iter:
+        free_list.append({'start': cur_bg['start'],
+                          'length': cur_bg['length']})
+
+
+    return free_list
+
+nodesize = 0
+sectorsize = 512
+bg_file_path = ''
+extent_file_path = ''
+
+opts, args = getopt.getopt(sys.argv[1:], 's:n:b:e:')
+for o, a in opts:
+    if o == '-n':
+        nodesize = int(a)
+    elif o == '-b':
+        bg_file_path = a
+    elif o == '-e':
+        extent_file_path = a
+    elif o == '-s':
+        sectorsize = int(a)
+
+if nodesize == 0 or sectorsize == 0:
+    print("require -n <nodesize> and -s <sectorsize>")
+    sys.exit(1)
+if not bg_file_path or not extent_file_path:
+    print("require -b <bg_file> and -e <extent_file>")
+    sys.exit(1)
+
+bg_list = parse_block_groups(bg_file_path)
+extent_list = parse_extents(extent_file_path)
+free_space_list = calc_free_spaces(bg_list, extent_list)
+for free_space in free_space_list:
+    print(free_space['start'] >> 9,
+          (range_end(free_space) >> 9) - 1)
index 6f02b1cc35478219244093693c9bfa8297215362..4eb4252b8ba018c5cf7c310b3c17596e1bea5c93 100755 (executable)
@@ -93,13 +93,23 @@ get_free_sectors()
                        | sed -n 's/nodesize\s*\(.*\)/\1/p')
 
                # Get holes within block groups
-               $BTRFS_UTIL_PROG inspect-internal dump-tree -t extent $loop_dev \
-                       | $AWK_PROG -v sectorsize=512 -v nodesize=$nodesize -f $here/src/parse-extent-tree.awk
+               $BTRFS_UTIL_PROG inspect-internal dump-tree -t extent $loop_dev >> $tmp/extent_dump
+               if $BTRFS_UTIL_PROG inspect-internal dump-super $loop_dev |\
+                               grep -q "BLOCK_GROUP_TREE"; then
+                       $BTRFS_UTIL_PROG inspect-internal dump-tree -t block-group $loop_dev \
+                               >> $tmp/bg_dump
+               else
+                       cp $tmp/extent_dump $tmp/bg_dump
+               fi
+               $PYTHON3_PROG $here/src/parse-free-space.py -n $nodesize -b $tmp/bg_dump \
+                       -e $tmp/extent_dump >> $tmp/bg_free_space
 
                # Get holes within unallocated space on disk
                $BTRFS_UTIL_PROG inspect-internal dump-tree -t dev $loop_dev \
-                       | $AWK_PROG -v sectorsize=512 -v devsize=$device_size -f $here/src/parse-dev-tree.awk
+                       | $AWK_PROG -v sectorsize=512 -v devsize=$device_size \
+                         -f $here/src/parse-dev-tree.awk >> $tmp/unallocated
 
+               cat $tmp/bg_free_space $tmp/unallocated | sort
        ;;
        esac
 }
@@ -157,7 +167,13 @@ merged_sectors="$tmp/merged_free_sectors"
 mkdir $loop_mnt
 
 [ "$FSTYP" = "xfs" ] && MKFS_OPTIONS="-f $MKFS_OPTIONS"
-[ "$FSTYP" = "btrfs" ] && MKFS_OPTIONS="$MKFS_OPTIONS -f -dsingle -msingle"
+if [ "$FSTYP" = "btrfs" ]; then
+       # Only SINGLE chunks have their logical address 1:1 mapped
+       # to physical addresses.
+       MKFS_OPTIONS="$MKFS_OPTIONS -f -dsingle -msingle"
+       _require_command $PYTHON3_PROG python3
+fi
+
 
 _mkfs_dev $loop_dev
 _mount $loop_dev $loop_mnt