diff options
author | Junio C Hamano <gitster@pobox.com> | 2019-10-11 14:24:47 +0900 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2019-10-11 14:24:47 +0900 |
commit | a73f91774cbfb6f31bca328ebf200498fe92d97a (patch) | |
tree | d76ed7e7108565f006967418f33e3d667135ca34 /t/t7816-grep-binary-pattern.sh | |
parent | Merge branch 'pw/rebase-i-show-HEAD-to-reword' (diff) | |
parent | grep: under --debug, show whether PCRE JIT is enabled (diff) | |
download | tgif-a73f91774cbfb6f31bca328ebf200498fe92d97a.tar.xz |
Merge branch 'ab/pcre-jit-fixes'
A few simplification and bugfixes to PCRE interface.
* ab/pcre-jit-fixes:
grep: under --debug, show whether PCRE JIT is enabled
grep: do not enter PCRE2_UTF mode on fixed matching
grep: stess test PCRE v2 on invalid UTF-8 data
grep: create a "is_fixed" member in "grep_pat"
grep: consistently use "p->fixed" in compile_regexp()
grep: stop using a custom JIT stack with PCRE v1
grep: stop "using" a custom JIT stack with PCRE v2
grep: remove overly paranoid BUG(...) code
grep: use PCRE v2 for optimized fixed-string search
grep: remove the kwset optimization
grep: drop support for \0 in --fixed-strings <pattern>
grep: make the behavior for NUL-byte in patterns sane
grep tests: move binary pattern tests into their own file
grep tests: move "grep binary" alongside the rest
grep: inline the return value of a function call used only once
t4210: skip more command-line encoding tests on MinGW
grep: don't use PCRE2?_UTF8 with "log --encoding=<non-utf8>"
log tests: test regex backends in "--encode=<enc>" tests
Diffstat (limited to 't/t7816-grep-binary-pattern.sh')
-rwxr-xr-x | t/t7816-grep-binary-pattern.sh | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/t/t7816-grep-binary-pattern.sh b/t/t7816-grep-binary-pattern.sh new file mode 100755 index 0000000000..60bab291e4 --- /dev/null +++ b/t/t7816-grep-binary-pattern.sh @@ -0,0 +1,127 @@ +#!/bin/sh + +test_description='git grep with a binary pattern files' + +. ./lib-gettext.sh + +nul_match_internal () { + matches=$1 + prereqs=$2 + lc_all=$3 + extra_flags=$4 + flags=$5 + pattern=$6 + pattern_human=$(echo "$pattern" | sed 's/Q/<NUL>/g') + + if test "$matches" = 1 + then + test_expect_success $prereqs "LC_ALL='$lc_all' git grep $extra_flags -f f $flags '$pattern_human' a" " + printf '$pattern' | q_to_nul >f && + LC_ALL='$lc_all' git grep $extra_flags -f f $flags a + " + elif test "$matches" = 0 + then + test_expect_success $prereqs "LC_ALL='$lc_all' git grep $extra_flags -f f $flags '$pattern_human' a" " + >stderr && + printf '$pattern' | q_to_nul >f && + test_must_fail env LC_ALL=\"$lc_all\" git grep $extra_flags -f f $flags a 2>stderr && + test_i18ngrep ! 'This is only supported with -P under PCRE v2' stderr + " + elif test "$matches" = P + then + test_expect_success $prereqs "error, PCRE v2 only: LC_ALL='$lc_all' git grep -f f $flags '$pattern_human' a" " + >stderr && + printf '$pattern' | q_to_nul >f && + test_must_fail env LC_ALL=\"$lc_all\" git grep -f f $flags a 2>stderr && + test_i18ngrep 'This is only supported with -P under PCRE v2' stderr + " + else + test_expect_success "PANIC: Test framework error. Unknown matches value $matches" 'false' + fi +} + +nul_match () { + matches=$1 + matches_pcre2=$2 + matches_pcre2_locale=$3 + flags=$4 + pattern=$5 + pattern_human=$(echo "$pattern" | sed 's/Q/<NUL>/g') + + nul_match_internal "$matches" "" "C" "" "$flags" "$pattern" + nul_match_internal "$matches_pcre2" "LIBPCRE2" "C" "-P" "$flags" "$pattern" + nul_match_internal "$matches_pcre2_locale" "LIBPCRE2,GETTEXT_LOCALE" "$is_IS_locale" "-P" "$flags" "$pattern" +} + +test_expect_success 'setup' " + echo 'binaryQfileQm[*]cQ*æQð' | q_to_nul >a && + git add a && + git commit -m. +" + +# Simple fixed-string matching that can use kwset (no -i && non-ASCII) +nul_match P P P '-F' 'yQf' +nul_match P P P '-F' 'yQx' +nul_match P P P '-Fi' 'YQf' +nul_match P P P '-Fi' 'YQx' +nul_match P P 1 '' 'yQf' +nul_match P P 0 '' 'yQx' +nul_match P P 1 '' 'æQð' +nul_match P P P '-F' 'eQm[*]c' +nul_match P P P '-Fi' 'EQM[*]C' + +# Regex patterns that would match but shouldn't with -F +nul_match P P P '-F' 'yQ[f]' +nul_match P P P '-F' '[y]Qf' +nul_match P P P '-Fi' 'YQ[F]' +nul_match P P P '-Fi' '[Y]QF' +nul_match P P P '-F' 'æQ[ð]' +nul_match P P P '-F' '[æ]Qð' + +# The -F kwset codepath can't handle -i && non-ASCII... +nul_match P 1 1 '-i' '[æ]Qð' + +# ...PCRE v2 only matches non-ASCII with -i casefolding under UTF-8 +# semantics +nul_match P P P '-Fi' 'ÆQ[Ð]' +nul_match P 0 1 '-i' 'ÆQ[Ð]' +nul_match P 0 1 '-i' '[Æ]QÐ' +nul_match P 0 1 '-i' '[Æ]Qð' +nul_match P 0 1 '-i' 'ÆQÐ' + +# \0 in regexes can only work with -P & PCRE v2 +nul_match P P 1 '' 'yQ[f]' +nul_match P P 1 '' '[y]Qf' +nul_match P P 1 '-i' 'YQ[F]' +nul_match P P 1 '-i' '[Y]Qf' +nul_match P P 1 '' 'æQ[ð]' +nul_match P P 1 '' '[æ]Qð' +nul_match P P 1 '-i' 'ÆQ[Ð]' +nul_match P P 1 '' 'eQm.*cQ' +nul_match P P 1 '-i' 'EQM.*cQ' +nul_match P P 0 '' 'eQm[*]c' +nul_match P P 0 '-i' 'EQM[*]C' + +# Assert that we're using REG_STARTEND and the pattern doesn't match +# just because it's cut off at the first \0. +nul_match P P 0 '-i' 'NOMATCHQð' +nul_match P P 0 '-i' '[Æ]QNOMATCH' +nul_match P P 0 '-i' '[æ]QNOMATCH' + +# Ensure that the matcher doesn't regress to something that stops at +# \0 +nul_match P P P '-F' 'yQ[f]' +nul_match P P P '-Fi' 'YQ[F]' +nul_match P P 0 '' 'yQNOMATCH' +nul_match P P 0 '' 'QNOMATCH' +nul_match P P 0 '-i' 'YQNOMATCH' +nul_match P P 0 '-i' 'QNOMATCH' +nul_match P P P '-F' 'æQ[ð]' +nul_match P P P '-Fi' 'ÆQ[Ð]' +nul_match P P 1 '-i' 'ÆQ[Ð]' +nul_match P P 0 '' 'yQNÓMATCH' +nul_match P P 0 '' 'QNÓMATCH' +nul_match P P 0 '-i' 'YQNÓMATCH' +nul_match P P 0 '-i' 'QNÓMATCH' + +test_done |