2 # SPDX-License-Identifier: GPL-2.0
3 # Copyright (c) 2017, Oracle and/or its affiliates. All Rights Reserved.
7 # Create a directory with multiple filenames that all appear the same
8 # (in unicode, anyway) but point to different inodes. In theory all
9 # Linux filesystems should allow this (filenames are a sequence of
10 # arbitrary bytes) even if the user implications are horrifying.
13 seqres="$RESULT_DIR/$seq"
14 echo "QA output created by $seq"
18 status=1 # failure is the default!
19 trap "_cleanup; exit \$status" 0 1 2 3 15
26 # get standard environment, filters and checks
32 echo "Format and mount"
33 _scratch_mkfs > $seqres.full 2>&1
34 _scratch_mount >> $seqres.full 2>&1
36 testdir="${SCRATCH_MNT}/test-${seq}"
40 echo -n "$1" | od -tx1 -w99999 | head -n1 | sed -e 's/^0* //g'
47 echo "${value}" > "${testdir}/${key}"
48 echo "Storing ${key} ($(hexbytes "${key}")) -> ${value}" >> $seqres.full
55 mkdir -p "${testdir}/${key}"
56 echo "${value}" > "${testdir}/${key}/value"
57 echo "Storing ${key} ($(hexbytes "${key}")) -> ${value}" >> $seqres.full
63 fname="${testdir}/${key}"
65 echo "Testing ${key} ($(hexbytes "${key}")) -> ${value}" >> $seqres.full
67 if [ ! -e "${fname}" ]; then
68 echo "Key ${key} does not exist for ${value} test??"
72 actual_value="$(cat "${fname}")"
73 if [ "${actual_value}" != "${value}" ]; then
74 echo "Key ${key} has value ${value}, expected ${actual_value}."
81 fname="${testdir}/${key}/value"
83 echo "Testing ${key} ($(hexbytes "${key}")) -> ${value}" >> $seqres.full
85 if [ ! -e "${fname}" ]; then
86 echo "Key ${key} does not exist for ${value} test??"
90 actual_value="$(cat "${fname}")"
91 if [ "${actual_value}" != "${value}" ]; then
92 echo "Key ${key} has value ${value}, expected ${actual_value}."
97 grep 'Unicode' | sed -e 's/^.*Duplicate/Duplicate/g'
101 # These two render the same
102 setf "french_caf\xc3\xa9.txt" "NFC"
103 setf "french_cafe\xcc\x81.txt" "NFD"
105 # These two may have different widths
106 setf "chinese_\xef\xbd\xb6.txt" "NFKC1"
107 setf "chinese_\xe3\x82\xab.txt" "NFKC2"
109 # Same point, different byte representations in NFC/NFD/NFKC/NFKD
110 setf "greek_\xcf\x93.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFC"
111 setf "greek_\xcf\x92\xcc\x81.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFD"
112 setf "greek_\xce\x8e.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFKC"
113 setf "greek_\xce\xa5\xcc\x81.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFKD"
115 # Arabic code point can expand into a muuuch longer series
116 setf "arabic_\xef\xb7\xba.txt" "ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM, NFC"
117 setf "arabic_\xd8\xb5\xd9\x84\xd9\x89\x20\xd8\xa7\xd9\x84\xd9\x84\xd9\x87\x20\xd8\xb9\xd9\x84\xd9\x8a\xd9\x87\x20\xd9\x88\xd8\xb3\xd9\x84\xd9\x85.txt" "ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM, NFKC"
120 setf "urk\xc0\xafmoo" "FAKESLASH"
122 # Emoji: octopus butterfly owl giraffe
123 setf "emoji_\xf0\x9f\xa6\x91\xf0\x9f\xa6\x8b\xf0\x9f\xa6\x89\xf0\x9f\xa6\x92.txt" "octopus butterfly owl giraffe emoji"
125 # Line draw characters, because why not?
126 setf "\x6c\x69\x6e\x65\x64\x72\x61\x77\x5f\x0a\xe2\x95\x94\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x97\x0a\xe2\x95\x91\x20\x6d\x65\x74\x61\x74\x61\x62\x6c\x65\x20\xe2\x95\x91\x0a\xe2\x95\x9f\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x95\xa2\x0a\xe2\x95\x91\x20\x5f\x5f\x69\x6e\x64\x65\x78\x20\x20\x20\xe2\x95\x91\x0a\xe2\x95\x9a\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x9d\x0a.txt" "ugly box because we can"
128 # unicode rtl widgets too...
129 setf "moo\xe2\x80\xaegnp.txt" "Well say hello,"
130 setf "mootxt.png" "Harvey"
132 # mixed-script confusables
133 setf "mixed_t\xce\xbfp.txt" "greek omicron instead of o"
134 setf "mixed_top.txt" "greek omicron instead of o"
136 # single-script spoofing
137 setf "hyphens_a\xe2\x80\x90b.txt" "hyphens"
138 setf "hyphens_a-b.txt" "hyphens"
140 setf "dz_digraph_dze.txt" "d-z digraph"
141 setf "dz_digraph_\xca\xa3e.txt" "d-z digraph"
143 # inadequate rendering
144 setf "inadequate_al.txt" "is it l or is it 1"
145 setf "inadequate_a1.txt" "is it l or is it 1"
148 setf "prohibition_Rs.txt" "rupee symbol"
149 setf "prohibition_\xe2\x82\xa8.txt" "rupee symbol"
152 setf "zerojoin_moocow.txt" "zero width joiners"
153 setf "zerojoin_moo\xe2\x80\x8dcow.txt" "zero width joiners"
156 setf "combmark_\xe1\x80\x9c\xe1\x80\xad\xe1\x80\xaf.txt" "combining marks"
157 setf "combmark_\xe1\x80\x9c\xe1\x80\xaf\xe1\x80\xad.txt" "combining marks"
160 setd ".\xe2\x80\x8d" "zero width joiners in dot entry"
161 setd "..\xe2\x80\x8d" "zero width joiners in dotdot entry"
163 ls -la $testdir >> $seqres.full
166 testf "french_caf\xc3\xa9.txt" "NFC"
167 testf "french_cafe\xcc\x81.txt" "NFD"
169 testf "chinese_\xef\xbd\xb6.txt" "NFKC1"
170 testf "chinese_\xe3\x82\xab.txt" "NFKC2"
172 testf "greek_\xcf\x93.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFC"
173 testf "greek_\xcf\x92\xcc\x81.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFD"
174 testf "greek_\xce\x8e.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFKC"
175 testf "greek_\xce\xa5\xcc\x81.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFKD"
177 testf "arabic_\xef\xb7\xba.txt" "ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM, NFC"
178 testf "arabic_\xd8\xb5\xd9\x84\xd9\x89\x20\xd8\xa7\xd9\x84\xd9\x84\xd9\x87\x20\xd8\xb9\xd9\x84\xd9\x8a\xd9\x87\x20\xd9\x88\xd8\xb3\xd9\x84\xd9\x85.txt" "ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM, NFKC"
180 testf "urk\xc0\xafmoo" "FAKESLASH"
182 testf "emoji_\xf0\x9f\xa6\x91\xf0\x9f\xa6\x8b\xf0\x9f\xa6\x89\xf0\x9f\xa6\x92.txt" "octopus butterfly owl giraffe emoji"
184 testf "\x6c\x69\x6e\x65\x64\x72\x61\x77\x5f\x0a\xe2\x95\x94\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x97\x0a\xe2\x95\x91\x20\x6d\x65\x74\x61\x74\x61\x62\x6c\x65\x20\xe2\x95\x91\x0a\xe2\x95\x9f\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x95\xa2\x0a\xe2\x95\x91\x20\x5f\x5f\x69\x6e\x64\x65\x78\x20\x20\x20\xe2\x95\x91\x0a\xe2\x95\x9a\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x9d\x0a.txt" "ugly box because we can"
186 testf "moo\xe2\x80\xaegnp.txt" "Well say hello,"
187 testf "mootxt.png" "Harvey"
189 testf "mixed_t\xce\xbfp.txt" "greek omicron instead of o"
190 testf "mixed_top.txt" "greek omicron instead of o"
192 testf "hyphens_a\xe2\x80\x90b.txt" "hyphens"
193 testf "hyphens_a-b.txt" "hyphens"
195 testf "dz_digraph_dze.txt" "d-z digraph"
196 testf "dz_digraph_\xca\xa3e.txt" "d-z digraph"
198 testf "inadequate_al.txt" "is it l or is it 1"
199 testf "inadequate_a1.txt" "is it l or is it 1"
201 testf "prohibition_Rs.txt" "rupee symbol"
202 testf "prohibition_\xe2\x82\xa8.txt" "rupee symbol"
204 testf "zerojoin_moocow.txt" "zero width joiners"
205 testf "zerojoin_moo\xe2\x80\x8dcow.txt" "zero width joiners"
207 testf "combmark_\xe1\x80\x9c\xe1\x80\xad\xe1\x80\xaf.txt" "combining marks"
208 testf "combmark_\xe1\x80\x9c\xe1\x80\xaf\xe1\x80\xad.txt" "combining marks"
210 testd ".\xe2\x80\x8d" "zero width joiners in dot entry"
211 testd "..\xe2\x80\x8d" "zero width joiners in dotdot entry"
213 echo "Uniqueness of inodes?"
214 stat -c '%i' "${testdir}/"* | sort | uniq -c | while read nr inum; do
215 if [ "${nr}" -gt 1 ]; then
220 echo "Test XFS online scrub, if applicable"
222 # Only run this on xfs if xfs_scrub is available and has the unicode checker
224 [ "$FSTYP" == "xfs" ] || return 1
225 _supports_xfs_scrub "$SCRATCH_MNT" "$SCRATCH_DEV" || return 1
227 # We only care if xfs_scrub has unicode string support...
228 if ! type ldd > /dev/null 2>&1 || \
229 ! ldd "${XFS_SCRUB_PROG}" | grep -q libicui18n; then
236 if check_xfs_scrub; then
237 output="$(LC_ALL="C.UTF-8" ${XFS_SCRUB_PROG} -v -n "${SCRATCH_MNT}" 2>&1 | filter_scrub)"
238 echo "${output}" | grep -q "french_" || echo "No complaints about french e accent?"
239 echo "${output}" | grep -q "greek_" || echo "No complaints about greek letter mess?"
240 echo "${output}" | grep -q "arabic_" || echo "No complaints about arabic expanded string?"
241 echo "${output}" | grep -q "mixed_" || echo "No complaints about mixed script confusables?"
242 echo "${output}" | grep -q "hyphens_" || echo "No complaints about hyphenation confusables?"
243 echo "${output}" | grep -q "dz_digraph_" || echo "No complaints about single script confusables?"
244 echo "${output}" | grep -q "inadequate_" || echo "No complaints about inadequate rendering confusables?"
245 echo "${output}" | grep -q "prohibition_" || echo "No complaints about prohibited sequence confusables?"
246 echo "${output}" | grep -q "zerojoin_" || echo "No complaints about zero-width join confusables?"
247 echo "Actual xfs_scrub output:" >> $seqres.full
248 echo "${output}" >> $seqres.full