From: Darrick J. Wong Date: Thu, 7 May 2026 22:23:19 +0000 (-0700) Subject: generic/45[34]: add detection of confusable variation sequences X-Git-Tag: v2026.05.17~12 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f61ba0fde3ec9cdfd7eed711aca2b28e973c2905;p=xfstests-dev.git generic/45[34]: add detection of confusable variation sequences ArsTechnica recently wrote about a GitHub supply chain attack wherein non-rendering unicode sequences were embedded in javascript files to hide payloads that could be decrypted trivially later. While these are unlikely to appear in file and attribute names, xfs_scrub will warn about this sort of steganography, so let's make sure it works. Signed-off-by: "Darrick J. Wong" Reviewed-by: Zorro Lang Signed-off-by: Zorro Lang --- diff --git a/tests/generic/453 b/tests/generic/453 index bd5ce8b2..0193b010 100755 --- a/tests/generic/453 +++ b/tests/generic/453 @@ -233,6 +233,20 @@ setf "\xf0\x9f\xab\xb6\xf0\x9f\x8f\xbc" "medium light" setf "\xf0\x9f\xab\xb6\xf0\x9f\x8f\xbb" "light" setf "\xf0\x9f\xab\xb6" "neutral" +# confusion with variation selectors +setf "variations.txt" v0 +setf "varia\xef\xb8\x80tions.txt" v1 +setf "\xef\xb8\x80variations.txt" v2 +setf "vari\xef\xb8\x80\xef\xb8\x81ations.txt" v3 +setf "varia\xf3\xa0\x87\xa4tions.txt" v4 + +# deprecated tags are considered control characters +setf "tags_moocow.txt" u0 +setf "tags_m\xf3\xa0\x81\xadoocow.txt" u1 + +# totally hidden name? "(Hi)" is the file name +setf "\xf3\xa0\x80\xa8\xf3\xa0\x81\x88\xf3\xa0\x81\xa9\xf3\xa0\x80\xa9" "(Hi)" + ls -laR $testdir >> $seqres.full echo "Test files" @@ -331,6 +345,20 @@ testf "\xf0\x9f\xab\xb6\xf0\x9f\x8f\xbc" "medium light" testf "\xf0\x9f\xab\xb6\xf0\x9f\x8f\xbb" "light" testf "\xf0\x9f\xab\xb6" "neutral" +# confusion with variation selectors +testf "variations.txt" v0 +testf "varia\xef\xb8\x80tions.txt" v1 +testf "\xef\xb8\x80variations.txt" v2 +testf "vari\xef\xb8\x80\xef\xb8\x81ations.txt" v3 +testf "varia\xf3\xa0\x87\xa4tions.txt" v4 + +# deprecated tags are considered control characters +testf "tags_moocow.txt" u0 +testf "tags_m\xf3\xa0\x81\xadoocow.txt" u1 + +# totally hidden name? "(Hi)" is the file name +testf "\xf3\xa0\x80\xa8\xf3\xa0\x81\x88\xf3\xa0\x81\xa9\xf3\xa0\x80\xa9" "(Hi)" + echo "Uniqueness of inodes?" stat -c '%i' "${testdir}/"* | sort | uniq -c | while read nr inum; do if [ "${nr}" -gt 1 ]; then @@ -368,6 +396,13 @@ if _check_xfs_scrub_does_unicode "$SCRATCH_MNT" "$SCRATCH_DEV"; then grep -q "llamapirate" $tmp.scrub || echo "No complaints about hidden llm instructions in filenames?" fi + if grep -q "variations" $tmp.scrub; then + grep -q 'varia.xef.xb8' $tmp.scrub || echo "No complaints about variation sequence confusion?" + grep -q 'varia.xf3.xa0' $tmp.scrub || echo "No complaints about extended variation sequence confusion?" + grep -q 'x80variations' $tmp.scrub || echo "No complaints about variations starting a name?" + grep -q 'tags_m.xf3.xa0.x81' $tmp.scrub || echo "No complaints about deprecated unicode tags in a name?" + fi + echo "Actual xfs_scrub output:" >> $seqres.full cat $tmp.scrub >> $seqres.full fi diff --git a/tests/generic/454 b/tests/generic/454 index 9f6ddb4a..3454cae5 100755 --- a/tests/generic/454 +++ b/tests/generic/454 @@ -154,6 +154,20 @@ setf "\xf0\x9f\xab\xb6\xf0\x9f\x8f\xbc" "medium light" setf "\xf0\x9f\xab\xb6\xf0\x9f\x8f\xbb" "light" setf "\xf0\x9f\xab\xb6" "neutral" +# confusion with variation selectors +setf "variations.txt" v0 +setf "varia\xef\xb8\x80tions.txt" v1 +setf "\xef\xb8\x80variations.txt" v2 +setf "vari\xef\xb8\x80\xef\xb8\x81ations.txt" v3 +setf "varia\xf3\xa0\x87\xa4tions.txt" v4 + +# deprecated tags are considered control characters +setf "tags_moocow.txt" u0 +setf "tags_m\xf3\xa0\x81\xadoocow.txt" u1 + +# totally hidden name? "(Hi)" is the file name +setf "\xf3\xa0\x80\xa8\xf3\xa0\x81\x88\xf3\xa0\x81\xa9\xf3\xa0\x80\xa9" "(Hi)" + _getfattr --absolute-names -d "${testfile}" >> $seqres.full echo "Test files" @@ -229,6 +243,20 @@ testf "\xf0\x9f\xab\xb6\xf0\x9f\x8f\xbc" "medium light" testf "\xf0\x9f\xab\xb6\xf0\x9f\x8f\xbb" "light" testf "\xf0\x9f\xab\xb6" "neutral" +# confusion with variation selectors +testf "variations.txt" v0 +testf "varia\xef\xb8\x80tions.txt" v1 +testf "\xef\xb8\x80variations.txt" v2 +testf "vari\xef\xb8\x80\xef\xb8\x81ations.txt" v3 +testf "varia\xf3\xa0\x87\xa4tions.txt" v4 + +# deprecated tags are considered control characters +testf "tags_moocow.txt" u0 +testf "tags_m\xf3\xa0\x81\xadoocow.txt" u1 + +# totally hidden name? "(Hi)" is the file name +testf "\xf3\xa0\x80\xa8\xf3\xa0\x81\x88\xf3\xa0\x81\xa9\xf3\xa0\x80\xa9" "(Hi)" + echo "Uniqueness of keys?" crazy_keys="$(_getfattr --absolute-names -d "${testfile}" | grep -E -c '(french_|chinese_|greek_|arabic_|urk)')" expected_keys=11 @@ -249,6 +277,14 @@ if _check_xfs_scrub_does_unicode "$SCRATCH_MNT" "$SCRATCH_DEV"; then grep -q "prohibition_" $tmp.scrub || echo "No complaints about prohibited sequence confusables?" grep -q "zerojoin_" $tmp.scrub || echo "No complaints about zero-width join confusables?" grep -q "llamapirate" $tmp.scrub || echo "No complaints about hidden llm instructions in filenames?" + + if grep -q "variations" $tmp.scrub; then + grep -q 'varia.xef.xb8' $tmp.scrub || echo "No complaints about variation sequence confusion?" + grep -q 'varia.xf3.xa0' $tmp.scrub || echo "No complaints about extended variation sequence confusion?" + grep -q 'x80variations' $tmp.scrub || echo "No complaints about variations starting a name?" + grep -q 'tags_m.xf3.xa0.x81' $tmp.scrub || echo "No complaints about deprecated unicode tags in a name?" + fi + echo "Actual xfs_scrub output:" >> $seqres.full echo "${output}" >> $seqres.full fi