]> git-server-git.apps.pok.os.sepia.ceph.com Git - xfsprogs-dev.git/commitdiff
xfs_scrub: warn about unicode variation selectors in names
authorDarrick J. Wong <djwong@kernel.org>
Mon, 13 Apr 2026 14:57:00 +0000 (07:57 -0700)
committerAndrey Albershteyn <aalbersh@kernel.org>
Fri, 17 Apr 2026 10:19:11 +0000 (12:19 +0200)
ArsTechnica recently wrote about a GitHub supply chain attack wherein
non-rendering unicode sequences were embedded in javascript files to
hide payloads that could be decrypted trivially later.  While these are
unlikely to appear in file and attribute names, we should warn about
this sort of steganography.

Link: https://arstechnica.com/security/2026/03/supply-chain-attack-using-invisible-code-hits-github-and-other-repositories/
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
scrub/unicrash.c

index b83bef644b6dceb5ec6925210e29b3d4b68cd8c3..75493c5ee795da3c57f356eda17d71121dd94fea 100644 (file)
@@ -127,6 +127,9 @@ struct unicrash {
 /* Possible phony file extension. */
 #define UNICRASH_PHONY_EXTENSION ((__force badname_t)(1U << 6))
 
+/* More than one variation selector in a row. */
+#define UNICRASH_VARIATION_RUN ((__force badname_t)(1U << 7))
+
 /* FULL STOP (aka period), 0x2E */
 #define UCHAR_PERIOD           ((UChar32)'.')
 
@@ -501,9 +504,14 @@ name_entry_examine(
        UChar32                 uchr;
        uint8_t                 mask = 0;
        unsigned int            ret = 0;
+       /* Don't allow the first codepoint to be a variation */
+       UBool                   was_variation = true;
 
        uiter_setString(&uiter, entry->normstr, entry->normstrlen);
        while ((uchr = uiter_next32(&uiter)) != U_SENTINEL) {
+               UBool           is_variation =
+                       u_hasBinaryProperty(uchr, UCHAR_VARIATION_SELECTOR);
+
                /* characters are invisible */
                if (is_nonrendering(uchr))
                        ret |= UNICRASH_INVISIBLE;
@@ -534,6 +542,11 @@ name_entry_examine(
                default:
                        break;
                }
+
+               if (is_variation && was_variation)
+                       ret |= UNICRASH_VARIATION_RUN;
+
+               was_variation = is_variation;
        }
 
        /* mixing left-to-right and right-to-left chars */
@@ -835,6 +848,18 @@ _("Unicode name \"%s\" in %s contains control characters."),
                goto out;
        }
 
+       /*
+        * Variation codepoints only apply to the previous non-variation
+        * codepoint.  Seeing multiple in a row or at the start of a name is
+        * weird.
+        */
+       if (badflags & UNICRASH_VARIATION_RUN) {
+               str_warn(uc->ctx, descr_render(dsc),
+_("Unicode name \"%s\" in %s contains a weird sequence of variation selectors."),
+                               bad1, what);
+               goto out;
+       }
+
        /*
         * Skip the informational messages if the inode owning the name is
         * only writeable by root, because those files were put there by the