diff options
Diffstat (limited to 't')
-rwxr-xr-x | t/t4210-log-i18n.sh | 41 | ||||
-rwxr-xr-x | t/t7008-filter-branch-null-sha1.sh (renamed from t/t7009-filter-branch-null-sha1.sh) | 0 | ||||
-rwxr-xr-x | t/t7812-grep-icase-non-ascii.sh | 28 | ||||
-rwxr-xr-x | t/t7815-grep-binary.sh (renamed from t/t7008-grep-binary.sh) | 101 | ||||
-rwxr-xr-x | t/t7816-grep-binary-pattern.sh | 127 |
5 files changed, 194 insertions, 103 deletions
diff --git a/t/t4210-log-i18n.sh b/t/t4210-log-i18n.sh index 7c519436ef..6e61f57f09 100755 --- a/t/t4210-log-i18n.sh +++ b/t/t4210-log-i18n.sh @@ -1,12 +1,15 @@ #!/bin/sh test_description='test log with i18n features' -. ./test-lib.sh +. ./lib-gettext.sh # two forms of é utf8_e=$(printf '\303\251') latin1_e=$(printf '\351') +# invalid UTF-8 +invalid_e=$(printf '\303\50)') # ")" at end to close opening "(" + test_expect_success 'create commits in different encodings' ' test_tick && cat >msg <<-EOF && @@ -48,9 +51,43 @@ test_expect_success !MINGW 'log --grep does not find non-reencoded values (utf8) test_must_be_empty actual ' -test_expect_success 'log --grep does not find non-reencoded values (latin1)' ' +test_expect_success !MINGW 'log --grep does not find non-reencoded values (latin1)' ' git log --encoding=ISO-8859-1 --format=%s --grep=$utf8_e >actual && test_must_be_empty actual ' +for engine in fixed basic extended perl +do + prereq= + if test $engine = "perl" + then + prereq="PCRE" + else + prereq="" + fi + force_regex= + if test $engine != "fixed" + then + force_regex=.* + fi + test_expect_success !MINGW,GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not find non-reencoded values (latin1 + locale)" " + cat >expect <<-\EOF && + latin1 + utf8 + EOF + LC_ALL=\"$is_IS_locale\" git -c grep.patternType=$engine log --encoding=ISO-8859-1 --format=%s --grep=\"$force_regex$latin1_e\" >actual && + test_cmp expect actual + " + + test_expect_success !MINGW,GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not find non-reencoded values (latin1 + locale)" " + LC_ALL=\"$is_IS_locale\" git -c grep.patternType=$engine log --encoding=ISO-8859-1 --format=%s --grep=\"$force_regex$utf8_e\" >actual && + test_must_be_empty actual + " + + test_expect_success !MINGW,GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not die on invalid UTF-8 value (latin1 + locale + invalid needle)" " + LC_ALL=\"$is_IS_locale\" git -c grep.patternType=$engine log --encoding=ISO-8859-1 --format=%s --grep=\"$force_regex$invalid_e\" >actual && + test_must_be_empty actual + " +done + test_done diff --git a/t/t7009-filter-branch-null-sha1.sh b/t/t7008-filter-branch-null-sha1.sh index 9ba9f24ad2..9ba9f24ad2 100755 --- a/t/t7009-filter-branch-null-sha1.sh +++ b/t/t7008-filter-branch-null-sha1.sh diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh index 0c685d3598..531eb59d57 100755 --- a/t/t7812-grep-icase-non-ascii.sh +++ b/t/t7812-grep-icase-non-ascii.sh @@ -53,4 +53,32 @@ test_expect_success REGEX_LOCALE 'pickaxe -i on non-ascii' ' test_cmp expected actual ' +test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: setup invalid UTF-8 data' ' + printf "\\200\\n" >invalid-0x80 && + echo "ævar" >expected && + cat expected >>invalid-0x80 && + git add invalid-0x80 +' + +test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: grep ASCII from invalid UTF-8 data' ' + git grep -h "var" invalid-0x80 >actual && + test_cmp expected actual && + git grep -h "(*NO_JIT)var" invalid-0x80 >actual && + test_cmp expected actual +' + +test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: grep non-ASCII from invalid UTF-8 data' ' + git grep -h "æ" invalid-0x80 >actual && + test_cmp expected actual && + git grep -h "(*NO_JIT)æ" invalid-0x80 && + test_cmp expected actual +' + +test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: grep non-ASCII from invalid UTF-8 data with -i' ' + test_might_fail git grep -hi "Æ" invalid-0x80 >actual && + test_cmp expected actual && + test_must_fail git grep -hi "(*NO_JIT)Æ" invalid-0x80 && + test_cmp expected actual +' + test_done diff --git a/t/t7008-grep-binary.sh b/t/t7815-grep-binary.sh index 2d87c49b75..90ebb64f46 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7815-grep-binary.sh @@ -4,41 +4,6 @@ test_description='git grep in binary files' . ./test-lib.sh -nul_match () { - matches=$1 - flags=$2 - pattern=$3 - pattern_human=$(echo "$pattern" | sed 's/Q/<NUL>/g') - - if test "$matches" = 1 - then - test_expect_success "git grep -f f $flags '$pattern_human' a" " - printf '$pattern' | q_to_nul >f && - git grep -f f $flags a - " - elif test "$matches" = 0 - then - test_expect_success "git grep -f f $flags '$pattern_human' a" " - printf '$pattern' | q_to_nul >f && - test_must_fail git grep -f f $flags a - " - elif test "$matches" = T1 - then - test_expect_failure "git grep -f f $flags '$pattern_human' a" " - printf '$pattern' | q_to_nul >f && - git grep -f f $flags a - " - elif test "$matches" = T0 - then - test_expect_failure "git grep -f f $flags '$pattern_human' a" " - printf '$pattern' | q_to_nul >f && - test_must_fail git grep -f f $flags a - " - else - test_expect_success "PANIC: Test framework error. Unknown matches value $matches" 'false' - fi -} - test_expect_success 'setup' " echo 'binaryQfileQm[*]cQ*æQð' | q_to_nul >a && git add a && @@ -102,72 +67,6 @@ test_expect_failure 'git grep .fi a' ' git grep .fi a ' -nul_match 1 '-F' 'yQf' -nul_match 0 '-F' 'yQx' -nul_match 1 '-Fi' 'YQf' -nul_match 0 '-Fi' 'YQx' -nul_match 1 '' 'yQf' -nul_match 0 '' 'yQx' -nul_match 1 '' 'æQð' -nul_match 1 '-F' 'eQm[*]c' -nul_match 1 '-Fi' 'EQM[*]C' - -# Regex patterns that would match but shouldn't with -F -nul_match 0 '-F' 'yQ[f]' -nul_match 0 '-F' '[y]Qf' -nul_match 0 '-Fi' 'YQ[F]' -nul_match 0 '-Fi' '[Y]QF' -nul_match 0 '-F' 'æQ[ð]' -nul_match 0 '-F' '[æ]Qð' -nul_match 0 '-Fi' 'ÆQ[Ð]' -nul_match 0 '-Fi' '[Æ]QÐ' - -# kwset is disabled on -i & non-ASCII. No way to match non-ASCII \0 -# patterns case-insensitively. -nul_match T1 '-i' 'ÆQÐ' - -# \0 implicitly disables regexes. This is an undocumented internal -# limitation. -nul_match T1 '' 'yQ[f]' -nul_match T1 '' '[y]Qf' -nul_match T1 '-i' 'YQ[F]' -nul_match T1 '-i' '[Y]Qf' -nul_match T1 '' 'æQ[ð]' -nul_match T1 '' '[æ]Qð' -nul_match T1 '-i' 'ÆQ[Ð]' - -# ... because of \0 implicitly disabling regexes regexes that -# should/shouldn't match don't do the right thing. -nul_match T1 '' 'eQm.*cQ' -nul_match T1 '-i' 'EQM.*cQ' -nul_match T0 '' 'eQm[*]c' -nul_match T0 '-i' 'EQM[*]C' - -# Due to the REG_STARTEND extension when kwset() is disabled on -i & -# non-ASCII the string will be matched in its entirety, but the -# pattern will be cut off at the first \0. -nul_match 0 '-i' 'NOMATCHQð' -nul_match T0 '-i' '[Æ]QNOMATCH' -nul_match T0 '-i' '[æ]QNOMATCH' -# Matches, but for the wrong reasons, just stops at [æ] -nul_match 1 '-i' '[Æ]Qð' -nul_match 1 '-i' '[æ]Qð' - -# Ensure that the matcher doesn't regress to something that stops at -# \0 -nul_match 0 '-F' 'yQ[f]' -nul_match 0 '-Fi' 'YQ[F]' -nul_match 0 '' 'yQNOMATCH' -nul_match 0 '' 'QNOMATCH' -nul_match 0 '-i' 'YQNOMATCH' -nul_match 0 '-i' 'QNOMATCH' -nul_match 0 '-F' 'æQ[ð]' -nul_match 0 '-Fi' 'ÆQ[Ð]' -nul_match 0 '' 'yQNÓMATCH' -nul_match 0 '' 'QNÓMATCH' -nul_match 0 '-i' 'YQNÓMATCH' -nul_match 0 '-i' 'QNÓMATCH' - test_expect_success 'grep respects binary diff attribute' ' echo text >t && git add t && diff --git a/t/t7816-grep-binary-pattern.sh b/t/t7816-grep-binary-pattern.sh new file mode 100755 index 0000000000..60bab291e4 --- /dev/null +++ b/t/t7816-grep-binary-pattern.sh @@ -0,0 +1,127 @@ +#!/bin/sh + +test_description='git grep with a binary pattern files' + +. ./lib-gettext.sh + +nul_match_internal () { + matches=$1 + prereqs=$2 + lc_all=$3 + extra_flags=$4 + flags=$5 + pattern=$6 + pattern_human=$(echo "$pattern" | sed 's/Q/<NUL>/g') + + if test "$matches" = 1 + then + test_expect_success $prereqs "LC_ALL='$lc_all' git grep $extra_flags -f f $flags '$pattern_human' a" " + printf '$pattern' | q_to_nul >f && + LC_ALL='$lc_all' git grep $extra_flags -f f $flags a + " + elif test "$matches" = 0 + then + test_expect_success $prereqs "LC_ALL='$lc_all' git grep $extra_flags -f f $flags '$pattern_human' a" " + >stderr && + printf '$pattern' | q_to_nul >f && + test_must_fail env LC_ALL=\"$lc_all\" git grep $extra_flags -f f $flags a 2>stderr && + test_i18ngrep ! 'This is only supported with -P under PCRE v2' stderr + " + elif test "$matches" = P + then + test_expect_success $prereqs "error, PCRE v2 only: LC_ALL='$lc_all' git grep -f f $flags '$pattern_human' a" " + >stderr && + printf '$pattern' | q_to_nul >f && + test_must_fail env LC_ALL=\"$lc_all\" git grep -f f $flags a 2>stderr && + test_i18ngrep 'This is only supported with -P under PCRE v2' stderr + " + else + test_expect_success "PANIC: Test framework error. Unknown matches value $matches" 'false' + fi +} + +nul_match () { + matches=$1 + matches_pcre2=$2 + matches_pcre2_locale=$3 + flags=$4 + pattern=$5 + pattern_human=$(echo "$pattern" | sed 's/Q/<NUL>/g') + + nul_match_internal "$matches" "" "C" "" "$flags" "$pattern" + nul_match_internal "$matches_pcre2" "LIBPCRE2" "C" "-P" "$flags" "$pattern" + nul_match_internal "$matches_pcre2_locale" "LIBPCRE2,GETTEXT_LOCALE" "$is_IS_locale" "-P" "$flags" "$pattern" +} + +test_expect_success 'setup' " + echo 'binaryQfileQm[*]cQ*æQð' | q_to_nul >a && + git add a && + git commit -m. +" + +# Simple fixed-string matching that can use kwset (no -i && non-ASCII) +nul_match P P P '-F' 'yQf' +nul_match P P P '-F' 'yQx' +nul_match P P P '-Fi' 'YQf' +nul_match P P P '-Fi' 'YQx' +nul_match P P 1 '' 'yQf' +nul_match P P 0 '' 'yQx' +nul_match P P 1 '' 'æQð' +nul_match P P P '-F' 'eQm[*]c' +nul_match P P P '-Fi' 'EQM[*]C' + +# Regex patterns that would match but shouldn't with -F +nul_match P P P '-F' 'yQ[f]' +nul_match P P P '-F' '[y]Qf' +nul_match P P P '-Fi' 'YQ[F]' +nul_match P P P '-Fi' '[Y]QF' +nul_match P P P '-F' 'æQ[ð]' +nul_match P P P '-F' '[æ]Qð' + +# The -F kwset codepath can't handle -i && non-ASCII... +nul_match P 1 1 '-i' '[æ]Qð' + +# ...PCRE v2 only matches non-ASCII with -i casefolding under UTF-8 +# semantics +nul_match P P P '-Fi' 'ÆQ[Ð]' +nul_match P 0 1 '-i' 'ÆQ[Ð]' +nul_match P 0 1 '-i' '[Æ]QÐ' +nul_match P 0 1 '-i' '[Æ]Qð' +nul_match P 0 1 '-i' 'ÆQÐ' + +# \0 in regexes can only work with -P & PCRE v2 +nul_match P P 1 '' 'yQ[f]' +nul_match P P 1 '' '[y]Qf' +nul_match P P 1 '-i' 'YQ[F]' +nul_match P P 1 '-i' '[Y]Qf' +nul_match P P 1 '' 'æQ[ð]' +nul_match P P 1 '' '[æ]Qð' +nul_match P P 1 '-i' 'ÆQ[Ð]' +nul_match P P 1 '' 'eQm.*cQ' +nul_match P P 1 '-i' 'EQM.*cQ' +nul_match P P 0 '' 'eQm[*]c' +nul_match P P 0 '-i' 'EQM[*]C' + +# Assert that we're using REG_STARTEND and the pattern doesn't match +# just because it's cut off at the first \0. +nul_match P P 0 '-i' 'NOMATCHQð' +nul_match P P 0 '-i' '[Æ]QNOMATCH' +nul_match P P 0 '-i' '[æ]QNOMATCH' + +# Ensure that the matcher doesn't regress to something that stops at +# \0 +nul_match P P P '-F' 'yQ[f]' +nul_match P P P '-Fi' 'YQ[F]' +nul_match P P 0 '' 'yQNOMATCH' +nul_match P P 0 '' 'QNOMATCH' +nul_match P P 0 '-i' 'YQNOMATCH' +nul_match P P 0 '-i' 'QNOMATCH' +nul_match P P P '-F' 'æQ[ð]' +nul_match P P P '-Fi' 'ÆQ[Ð]' +nul_match P P 1 '-i' 'ÆQ[Ð]' +nul_match P P 0 '' 'yQNÓMATCH' +nul_match P P 0 '' 'QNÓMATCH' +nul_match P P 0 '-i' 'YQNÓMATCH' +nul_match P P 0 '-i' 'QNÓMATCH' + +test_done |