summaryrefslogtreecommitdiff
path: root/t/t0028-working-tree-encoding.sh
blob: 82905a2156f7daad6cbdbd1ae5e54d9235f0cdbc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
#!/bin/sh

test_description='working-tree-encoding conversion via gitattributes'

GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME

. ./test-lib.sh
. "$TEST_DIRECTORY/lib-encoding.sh"

GIT_TRACE_WORKING_TREE_ENCODING=1 && export GIT_TRACE_WORKING_TREE_ENCODING

test_expect_success 'setup test files' '
	git config core.eol lf &&

	text="hallo there!\ncan you read me?" &&
	echo "*.utf16 text working-tree-encoding=utf-16" >.gitattributes &&
	echo "*.utf16lebom text working-tree-encoding=UTF-16LE-BOM" >>.gitattributes &&
	printf "$text" >test.utf8.raw &&
	printf "$text" | write_utf16 >test.utf16.raw &&
	printf "$text" | write_utf32 >test.utf32.raw &&
	printf "\377\376"                         >test.utf16lebom.raw &&
	printf "$text" | iconv -f UTF-8 -t UTF-16LE >>test.utf16lebom.raw &&

	# Line ending tests
	printf "one\ntwo\nthree\n" >lf.utf8.raw &&
	printf "one\r\ntwo\r\nthree\r\n" >crlf.utf8.raw &&

	# BOM tests
	printf "\0a\0b\0c"                         >nobom.utf16be.raw &&
	printf "a\0b\0c\0"                         >nobom.utf16le.raw &&
	printf "\376\377\0a\0b\0c"                 >bebom.utf16be.raw &&
	printf "\377\376a\0b\0c\0"                 >lebom.utf16le.raw &&
	printf "\0\0\0a\0\0\0b\0\0\0c"             >nobom.utf32be.raw &&
	printf "a\0\0\0b\0\0\0c\0\0\0"             >nobom.utf32le.raw &&
	printf "\0\0\376\377\0\0\0a\0\0\0b\0\0\0c" >bebom.utf32be.raw &&
	printf "\377\376\0\0a\0\0\0b\0\0\0c\0\0\0" >lebom.utf32le.raw &&

	# Add only UTF-16 file, we will add the UTF-32 file later
	cp test.utf16.raw test.utf16 &&
	cp test.utf32.raw test.utf32 &&
	cp test.utf16lebom.raw test.utf16lebom &&
	git add .gitattributes test.utf16 test.utf16lebom &&
	git commit -m initial
'

test_expect_success 'ensure UTF-8 is stored in Git' '
	test_when_finished "rm -f test.utf16.git" &&

	git cat-file -p :test.utf16 >test.utf16.git &&
	test_cmp_bin test.utf8.raw test.utf16.git
'

test_expect_success 're-encode to UTF-16 on checkout' '
	test_when_finished "rm -f test.utf16.raw" &&

	rm test.utf16 &&
	git checkout test.utf16 &&
	test_cmp_bin test.utf16.raw test.utf16
'

test_expect_success 're-encode to UTF-16-LE-BOM on checkout' '
	rm test.utf16lebom &&
	git checkout test.utf16lebom &&
	test_cmp_bin test.utf16lebom.raw test.utf16lebom
'

test_expect_success 'check $GIT_DIR/info/attributes support' '
	test_when_finished "rm -f test.utf32.git" &&
	test_when_finished "git reset --hard HEAD" &&

	echo "*.utf32 text working-tree-encoding=utf-32" >.git/info/attributes &&
	git add test.utf32 &&

	git cat-file -p :test.utf32 >test.utf32.git &&
	test_cmp_bin test.utf8.raw test.utf32.git
'

for i in 16 32
do
	test_expect_success "check prohibited UTF-${i} BOM" '
		test_when_finished "git reset --hard HEAD" &&

		echo "*.utf${i}be text working-tree-encoding=utf-${i}be" >>.gitattributes &&
		echo "*.utf${i}le text working-tree-encoding=utf-${i}LE" >>.gitattributes &&

		# Here we add a UTF-16 (resp. UTF-32) files with BOM (big/little-endian)
		# but we tell Git to treat it as UTF-16BE/UTF-16LE (resp. UTF-32).
		# In these cases the BOM is prohibited.
		cp bebom.utf${i}be.raw bebom.utf${i}be &&
		test_must_fail git add bebom.utf${i}be 2>err.out &&
		test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
		test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&

		cp lebom.utf${i}le.raw lebom.utf${i}be &&
		test_must_fail git add lebom.utf${i}be 2>err.out &&
		test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
		test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&

		cp bebom.utf${i}be.raw bebom.utf${i}le &&
		test_must_fail git add bebom.utf${i}le 2>err.out &&
		test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
		test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&

		cp lebom.utf${i}le.raw lebom.utf${i}le &&
		test_must_fail git add lebom.utf${i}le 2>err.out &&
		test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
		test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out
	'

	test_expect_success "check required UTF-${i} BOM" '
		test_when_finished "git reset --hard HEAD" &&

		echo "*.utf${i} text working-tree-encoding=utf-${i}" >>.gitattributes &&

		cp nobom.utf${i}be.raw nobom.utf${i} &&
		test_must_fail git add nobom.utf${i} 2>err.out &&
		test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
		test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out &&

		cp nobom.utf${i}le.raw nobom.utf${i} &&
		test_must_fail git add nobom.utf${i} 2>err.out &&
		test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
		test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out
	'

	test_expect_success "eol conversion for UTF-${i} encoded files on checkout" '
		test_when_finished "rm -f crlf.utf${i}.raw lf.utf${i}.raw" &&
		test_when_finished "git reset --hard HEAD^" &&

		cat lf.utf8.raw | write_utf${i} >lf.utf${i}.raw &&
		cat crlf.utf8.raw | write_utf${i} >crlf.utf${i}.raw &&
		cp crlf.utf${i}.raw eol.utf${i} &&

		cat >expectIndexLF <<-EOF &&
			i/lf    w/-text attr/text             	eol.utf${i}
		EOF

		git add eol.utf${i} &&
		git commit -m eol &&

		# UTF-${i} with CRLF (Windows line endings)
		rm eol.utf${i} &&
		git -c core.eol=crlf checkout eol.utf${i} &&
		test_cmp_bin crlf.utf${i}.raw eol.utf${i} &&

		# Although the file has CRLF in the working tree,
		# ensure LF in the index
		git ls-files --eol eol.utf${i} >actual &&
		test_cmp expectIndexLF actual &&

		# UTF-${i} with LF (Unix line endings)
		rm eol.utf${i} &&
		git -c core.eol=lf checkout eol.utf${i} &&
		test_cmp_bin lf.utf${i}.raw eol.utf${i} &&

		# The file LF in the working tree, ensure LF in the index
		git ls-files --eol eol.utf${i} >actual &&
		test_cmp expectIndexLF actual
	'
done

test_expect_success 'check unsupported encodings' '
	test_when_finished "git reset --hard HEAD" &&

	echo "*.set text working-tree-encoding" >.gitattributes &&
	printf "set" >t.set &&
	test_must_fail git add t.set 2>err.out &&
	test_i18ngrep "true/false are no valid working-tree-encodings" err.out &&

	echo "*.unset text -working-tree-encoding" >.gitattributes &&
	printf "unset" >t.unset &&
	git add t.unset &&

	echo "*.empty text working-tree-encoding=" >.gitattributes &&
	printf "empty" >t.empty &&
	git add t.empty &&

	echo "*.garbage text working-tree-encoding=garbage" >.gitattributes &&
	printf "garbage" >t.garbage &&
	test_must_fail git add t.garbage 2>err.out &&
	test_i18ngrep "failed to encode" err.out
'

test_expect_success 'error if encoding round trip is not the same during refresh' '
	BEFORE_STATE=$(git rev-parse HEAD) &&
	test_when_finished "git reset --hard $BEFORE_STATE" &&

	# Add and commit a UTF-16 file but skip the "working-tree-encoding"
	# filter. Consequently, the in-repo representation is UTF-16 and not
	# UTF-8. This simulates a Git version that has no working tree encoding
	# support.
	echo "*.utf16le text working-tree-encoding=utf-16le" >.gitattributes &&
	echo "hallo" >nonsense.utf16le &&
	TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16le) &&
	git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16le &&
	COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
	git update-ref refs/heads/main $COMMIT &&

	test_must_fail git checkout HEAD^ 2>err.out &&
	test_i18ngrep "error: .* overwritten by checkout:" err.out
'

test_expect_success 'error if encoding garbage is already in Git' '
	BEFORE_STATE=$(git rev-parse HEAD) &&
	test_when_finished "git reset --hard $BEFORE_STATE" &&

	# Skip the UTF-16 filter for the added file
	# This simulates a Git version that has no checkoutEncoding support
	cp nobom.utf16be.raw nonsense.utf16 &&
	TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16) &&
	git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16 &&
	COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
	git update-ref refs/heads/main $COMMIT &&

	git diff 2>err.out &&
	test_i18ngrep "error: BOM is required" err.out
'

test_lazy_prereq ICONV_SHIFT_JIS '
	iconv -f UTF-8 -t SHIFT-JIS </dev/null
'

test_expect_success ICONV_SHIFT_JIS 'check roundtrip encoding' '
	test_when_finished "rm -f roundtrip.shift roundtrip.utf16" &&
	test_when_finished "git reset --hard HEAD" &&

	text="hallo there!\nroundtrip test here!" &&
	printf "$text" | iconv -f UTF-8 -t SHIFT-JIS >roundtrip.shift &&
	printf "$text" | write_utf16 >roundtrip.utf16 &&
	echo "*.shift text working-tree-encoding=SHIFT-JIS" >>.gitattributes &&

	# SHIFT-JIS encoded files are round-trip checked by default...
	GIT_TRACE=1 git add .gitattributes roundtrip.shift 2>&1 |
		grep "Checking roundtrip encoding for SHIFT-JIS" &&
	git reset &&

	# ... unless we overwrite the Git config!
	! GIT_TRACE=1 git -c core.checkRoundtripEncoding=garbage \
		add .gitattributes roundtrip.shift 2>&1 |
		grep "Checking roundtrip encoding for SHIFT-JIS" &&
	git reset &&

	# UTF-16 encoded files should not be round-trip checked by default...
	! GIT_TRACE=1 git add roundtrip.utf16 2>&1 |
		grep "Checking roundtrip encoding for UTF-16" &&
	git reset &&

	# ... unless we tell Git to check it!
	GIT_TRACE=1 git -c core.checkRoundtripEncoding="UTF-16, UTF-32" \
		add roundtrip.utf16 2>&1 |
		grep "Checking roundtrip encoding for utf-16" &&
	git reset &&

	# ... unless we tell Git to check it!
	# (here we also check that the casing of the encoding is irrelevant)
	GIT_TRACE=1 git -c core.checkRoundtripEncoding="UTF-32, utf-16" \
		add roundtrip.utf16 2>&1 |
		grep "Checking roundtrip encoding for utf-16" &&
	git reset
'

# $1: checkout encoding
# $2: test string
# $3: binary test string in checkout encoding
test_commit_utf8_checkout_other () {
	encoding="$1"
	orig_string="$2"
	expect_bytes="$3"

	test_expect_success "Commit UTF-8, checkout $encoding" '
		test_when_finished "git checkout HEAD -- .gitattributes" &&

		test_ext="commit_utf8_checkout_$encoding" &&
		test_file="test.$test_ext" &&

		# Commit as UTF-8
		echo "*.$test_ext text working-tree-encoding=UTF-8" >.gitattributes &&
		printf "$orig_string" >$test_file &&
		git add $test_file &&
		git commit -m "Test data" &&

		# Checkout in tested encoding
		rm $test_file &&
		echo "*.$test_ext text working-tree-encoding=$encoding" >.gitattributes &&
		git checkout HEAD -- $test_file &&

		# Test
		printf $expect_bytes >$test_file.raw &&
		test_cmp_bin $test_file.raw $test_file
	'
}

test_commit_utf8_checkout_other "UTF-8"        "Test Тест" "\124\145\163\164\040\320\242\320\265\321\201\321\202"
test_commit_utf8_checkout_other "UTF-16LE"     "Test Тест" "\124\000\145\000\163\000\164\000\040\000\042\004\065\004\101\004\102\004"
test_commit_utf8_checkout_other "UTF-16BE"     "Test Тест" "\000\124\000\145\000\163\000\164\000\040\004\042\004\065\004\101\004\102"
test_commit_utf8_checkout_other "UTF-16LE-BOM" "Test Тест" "\377\376\124\000\145\000\163\000\164\000\040\000\042\004\065\004\101\004\102\004"
test_commit_utf8_checkout_other "UTF-16BE-BOM" "Test Тест" "\376\377\000\124\000\145\000\163\000\164\000\040\004\042\004\065\004\101\004\102"
test_commit_utf8_checkout_other "UTF-32LE"     "Test Тест" "\124\000\000\000\145\000\000\000\163\000\000\000\164\000\000\000\040\000\000\000\042\004\000\000\065\004\000\000\101\004\000\000\102\004\000\000"
test_commit_utf8_checkout_other "UTF-32BE"     "Test Тест" "\000\000\000\124\000\000\000\145\000\000\000\163\000\000\000\164\000\000\000\040\000\000\004\042\000\000\004\065\000\000\004\101\000\000\004\102"

test_done