From: Darrick J. Wong Date: Mon, 13 Apr 2026 14:57:00 +0000 (-0700) Subject: xfs_scrub: warn about unicode variation selectors in names X-Git-Tag: v7.0.0~6 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=95329f9fa13040962c5a2a5e91a29ba215eb341f;p=xfsprogs-dev.git xfs_scrub: warn about unicode variation selectors in names ArsTechnica recently wrote about a GitHub supply chain attack wherein non-rendering unicode sequences were embedded in javascript files to hide payloads that could be decrypted trivially later. While these are unlikely to appear in file and attribute names, we should warn about this sort of steganography. Link: https://arstechnica.com/security/2026/03/supply-chain-attack-using-invisible-code-hits-github-and-other-repositories/ Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- diff --git a/scrub/unicrash.c b/scrub/unicrash.c index b83bef64..75493c5e 100644 --- a/scrub/unicrash.c +++ b/scrub/unicrash.c @@ -127,6 +127,9 @@ struct unicrash { /* Possible phony file extension. */ #define UNICRASH_PHONY_EXTENSION ((__force badname_t)(1U << 6)) +/* More than one variation selector in a row. */ +#define UNICRASH_VARIATION_RUN ((__force badname_t)(1U << 7)) + /* FULL STOP (aka period), 0x2E */ #define UCHAR_PERIOD ((UChar32)'.') @@ -501,9 +504,14 @@ name_entry_examine( UChar32 uchr; uint8_t mask = 0; unsigned int ret = 0; + /* Don't allow the first codepoint to be a variation */ + UBool was_variation = true; uiter_setString(&uiter, entry->normstr, entry->normstrlen); while ((uchr = uiter_next32(&uiter)) != U_SENTINEL) { + UBool is_variation = + u_hasBinaryProperty(uchr, UCHAR_VARIATION_SELECTOR); + /* characters are invisible */ if (is_nonrendering(uchr)) ret |= UNICRASH_INVISIBLE; @@ -534,6 +542,11 @@ name_entry_examine( default: break; } + + if (is_variation && was_variation) + ret |= UNICRASH_VARIATION_RUN; + + was_variation = is_variation; } /* mixing left-to-right and right-to-left chars */ @@ -835,6 +848,18 @@ _("Unicode name \"%s\" in %s contains control characters."), goto out; } + /* + * Variation codepoints only apply to the previous non-variation + * codepoint. Seeing multiple in a row or at the start of a name is + * weird. + */ + if (badflags & UNICRASH_VARIATION_RUN) { + str_warn(uc->ctx, descr_render(dsc), +_("Unicode name \"%s\" in %s contains a weird sequence of variation selectors."), + bad1, what); + goto out; + } + /* * Skip the informational messages if the inode owning the name is * only writeable by root, because those files were put there by the