From aca20dd558338446336934a4b18516cfbf7d8393 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 22 May 2010 23:26:39 +0200 Subject: grep: add test script for binary file handling Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- t/t7008-grep-binary.sh | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100755 t/t7008-grep-binary.sh (limited to 't/t7008-grep-binary.sh') diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh new file mode 100755 index 0000000000..2320e74b69 --- /dev/null +++ b/t/t7008-grep-binary.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +test_description='git grep in binary files' + +. ./test-lib.sh + +test_expect_success 'setup' " + printf 'binary\000file\n' >a && + git add a && + git commit -m. +" + +test_expect_success 'git grep ina a' ' + echo Binary file a matches >expect && + git grep ina a >actual && + test_cmp expect actual +' + +test_expect_success 'git grep -ah ina a' ' + git grep -ah ina a >actual && + test_cmp a actual +' + +test_expect_success 'git grep -I ina a' ' + : >expect && + test_must_fail git grep -I ina a >actual && + test_cmp expect actual +' + +test_expect_success 'git grep -L bar a' ' + echo a >expect && + git grep -L bar a >actual && + test_cmp expect actual +' + +test_expect_success 'git grep -q ina a' ' + : >expect && + git grep -q ina a >actual && + test_cmp expect actual +' + +test_done -- cgit v1.2.3 From c30c10cff1d640ce119596b907c10cc11f83358d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 22 May 2010 23:29:35 +0200 Subject: grep: --count over binary The intent of showing the message "Binary file xyz matches" for binary files is to avoid annoying users by potentially messing up their terminals by printing control characters. In --count mode, this precaution isn't necessary. Display counts of matches if -c/--count was specified, even if -a was not given. GNU grep does the same. Moving the check for ->count before the code for handling binary file also avoids printing context lines if --count and -[ABC] were used together, so we can remove the part of the comment that mentions this behaviour. Again, GNU grep does the same. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- t/t7008-grep-binary.sh | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 't/t7008-grep-binary.sh') diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh index 2320e74b69..91970eacd6 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7008-grep-binary.sh @@ -27,6 +27,12 @@ test_expect_success 'git grep -I ina a' ' test_cmp expect actual ' +test_expect_success 'git grep -c ina a' ' + echo a:1 >expect && + git grep -c ina a >actual && + test_cmp expect actual +' + test_expect_success 'git grep -L bar a' ' echo a >expect && git grep -L bar a >actual && -- cgit v1.2.3 From 321ffcc0556a94c461ac84667b35494c193804ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 22 May 2010 23:30:48 +0200 Subject: grep: --name-only over binary As with the option -c/--count, git grep with the option -l/--name-only should work the same with binary files as with text files because there is no danger of messing up the terminal with control characters from the contents of matching files. GNU grep does the same. Move the check for ->name_only before the one for binary_match_only, thus making the latter irrelevant for git grep -l. Reported-by: Dmitry Potapov Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- t/t7008-grep-binary.sh | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 't/t7008-grep-binary.sh') diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh index 91970eacd6..4a12d97922 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7008-grep-binary.sh @@ -33,6 +33,12 @@ test_expect_success 'git grep -c ina a' ' test_cmp expect actual ' +test_expect_success 'git grep -l ina a' ' + echo a >expect && + git grep -l ina a >actual && + test_cmp expect actual +' + test_expect_success 'git grep -L bar a' ' echo a >expect && git grep -L bar a >actual && -- cgit v1.2.3 From 1baddf4b3781c0c714442adfda496d667e1850cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 22 May 2010 23:32:43 +0200 Subject: grep: use memmem() for fixed string search Allow searching beyond NUL characters by using memmem() instead of strstr(). Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- t/t7008-grep-binary.sh | 4 ++++ 1 file changed, 4 insertions(+) (limited to 't/t7008-grep-binary.sh') diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh index 4a12d97922..9adc9ed6fe 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7008-grep-binary.sh @@ -51,4 +51,8 @@ test_expect_success 'git grep -q ina a' ' test_cmp expect actual ' +test_expect_success 'git grep -F ile a' ' + git grep -F ile a +' + test_done -- cgit v1.2.3 From 52d799a79f921cc47823a0455b0e646636410b65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 22 May 2010 23:34:06 +0200 Subject: grep: continue case insensitive fixed string search after NUL chars Functions for C strings, like strcasestr(), can't see beyond NUL characters. Check if there is such an obstacle on the line and try again behind it. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- t/t7008-grep-binary.sh | 4 ++++ 1 file changed, 4 insertions(+) (limited to 't/t7008-grep-binary.sh') diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh index 9adc9ed6fe..9660842c44 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7008-grep-binary.sh @@ -55,4 +55,8 @@ test_expect_success 'git grep -F ile a' ' git grep -F ile a ' +test_expect_success 'git grep -Fi iLE a' ' + git grep -Fi iLE a +' + test_done -- cgit v1.2.3 From f96e56733ab3e3ce5c79c27c673c746af1519a86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 22 May 2010 23:35:07 +0200 Subject: grep: use REG_STARTEND for all matching if available Refactor REG_STARTEND handling inlook_ahead() into a new helper, regmatch(), and use it for line matching, too. This allows regex matching beyond NUL characters if regexec() supports the flag. NUL characters themselves are not matched in any way, though. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- t/t7008-grep-binary.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 't/t7008-grep-binary.sh') diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh index 9660842c44..4f5e74fed7 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7008-grep-binary.sh @@ -59,4 +59,14 @@ test_expect_success 'git grep -Fi iLE a' ' git grep -Fi iLE a ' +# This test actually passes on platforms where regexec() supports the +# flag REG_STARTEND. +test_expect_failure 'git grep ile a' ' + git grep ile a +' + +test_expect_failure 'git grep .fi a' ' + git grep .fi a +' + test_done -- cgit v1.2.3 From ed40a0951cedb70777669144478166aa5bb2cf9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 22 May 2010 23:43:43 +0200 Subject: grep: support NUL chars in search strings for -F Search patterns in a file specified with -f can contain NUL characters. The current code ignores all characters on a line after a NUL. Pass the actual length of the line all the way from the pattern file to fixmatch() and use it for case-sensitive fixed string matching. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- t/t7008-grep-binary.sh | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 't/t7008-grep-binary.sh') diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh index 4f5e74fed7..eb8ca88cce 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7008-grep-binary.sh @@ -69,4 +69,34 @@ test_expect_failure 'git grep .fi a' ' git grep .fi a ' +test_expect_success 'git grep -F yf a' " + printf 'y\000f' >f && + git grep -f f -F a +" + +test_expect_success 'git grep -F yx a' " + printf 'y\000x' >f && + test_must_fail git grep -f f -F a +" + +test_expect_success 'git grep -Fi Yf a' " + printf 'Y\000f' >f && + git grep -f f -Fi a +" + +test_expect_failure 'git grep -Fi Yx a' " + printf 'Y\000x' >f && + test_must_fail git grep -f f -Fi a +" + +test_expect_success 'git grep yf a' " + printf 'y\000f' >f && + git grep -f f a +" + +test_expect_failure 'git grep yx a' " + printf 'y\000x' >f && + test_must_fail git grep -f f a +" + test_done -- cgit v1.2.3 From 7e36de5859afd77976b7583b2012c238b1ffbb7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Tue, 17 Aug 2010 09:24:41 +0000 Subject: t/t7008-grep-binary.sh: un-TODO a test that needs REG_STARTEND MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have a regex engine that supports REG_STARTEND this test should fail if "git grep" can't grep NULL characters. Platforms that don't have a POSIX regex engine which supports REG_STARTEND should always define NO_REGEX=YesPlease when compiling. Signed-off-by: Ævar Arnfjörð Bjarmason Acked-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- t/t7008-grep-binary.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 't/t7008-grep-binary.sh') diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh index eb8ca88cce..c0f9f3f705 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7008-grep-binary.sh @@ -61,7 +61,7 @@ test_expect_success 'git grep -Fi iLE a' ' # This test actually passes on platforms where regexec() supports the # flag REG_STARTEND. -test_expect_failure 'git grep ile a' ' +test_expect_success 'git grep ile a' ' git grep ile a ' -- cgit v1.2.3 From f98548764ea0baf7490b76782e323f90a941cc74 Mon Sep 17 00:00:00 2001 From: Brandon Casey Date: Thu, 9 Sep 2010 14:15:57 -0500 Subject: t/t7008: workaround broken handling of \000 by printf on IRIX On IRIX 6.5, the printf utility in /usr/bin does not appear to handle the \ddd notation according to POSIX. This printf appears to halt processing of the string argument and ignore any additional characters in the string. Work around this flaw by replacing the \000's with 'Q' and using the q_to_nul helper function provided by test-lib.sh This problem with printf is not apparent when using the Bash shell since Bash implements a POSIX compatible printf function internally. Signed-off-by: Brandon Casey Signed-off-by: Junio C Hamano --- t/t7008-grep-binary.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 't/t7008-grep-binary.sh') diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh index c0f9f3f705..e058d184d1 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7008-grep-binary.sh @@ -5,7 +5,7 @@ test_description='git grep in binary files' . ./test-lib.sh test_expect_success 'setup' " - printf 'binary\000file\n' >a && + echo 'binaryQfile' | q_to_nul >a && git add a && git commit -m. " @@ -70,32 +70,32 @@ test_expect_failure 'git grep .fi a' ' ' test_expect_success 'git grep -F yf a' " - printf 'y\000f' >f && + printf 'yQf' | q_to_nul >f && git grep -f f -F a " test_expect_success 'git grep -F yx a' " - printf 'y\000x' >f && + printf 'yQx' | q_to_nul >f && test_must_fail git grep -f f -F a " test_expect_success 'git grep -Fi Yf a' " - printf 'Y\000f' >f && + printf 'YQf' | q_to_nul >f && git grep -f f -Fi a " test_expect_failure 'git grep -Fi Yx a' " - printf 'Y\000x' >f && + printf 'YQx' | q_to_nul >f && test_must_fail git grep -f f -Fi a " test_expect_success 'git grep yf a' " - printf 'y\000f' >f && + printf 'yQf' | q_to_nul >f && git grep -f f a " test_expect_failure 'git grep yx a' " - printf 'y\000x' >f && + printf 'yQx' | q_to_nul >f && test_must_fail git grep -f f a " -- cgit v1.2.3 From 9eceddeec6ccdbcb0c6ce02a45357ffd5a428f39 Mon Sep 17 00:00:00 2001 From: Fredrik Kuivinen Date: Sun, 21 Aug 2011 00:42:18 +0200 Subject: Use kwset in grep Benchmarks for the hot cache case: before: $ perf stat --repeat=5 git grep qwerty > /dev/null Performance counter stats for 'git grep qwerty' (5 runs): 3,478,085 cache-misses # 2.322 M/sec ( +- 2.690% ) 11,356,177 cache-references # 7.582 M/sec ( +- 2.598% ) 3,872,184 branch-misses # 0.363 % ( +- 0.258% ) 1,067,367,848 branches # 712.673 M/sec ( +- 2.622% ) 3,828,370,782 instructions # 0.947 IPC ( +- 0.033% ) 4,043,832,831 cycles # 2700.037 M/sec ( +- 0.167% ) 8,518 page-faults # 0.006 M/sec ( +- 3.648% ) 847 CPU-migrations # 0.001 M/sec ( +- 3.262% ) 6,546 context-switches # 0.004 M/sec ( +- 2.292% ) 1497.695495 task-clock-msecs # 3.303 CPUs ( +- 2.550% ) 0.453394396 seconds time elapsed ( +- 0.912% ) after: $ perf stat --repeat=5 git grep qwerty > /dev/null Performance counter stats for 'git grep qwerty' (5 runs): 2,989,918 cache-misses # 3.166 M/sec ( +- 5.013% ) 10,986,041 cache-references # 11.633 M/sec ( +- 4.899% ) (scaled from 95.06%) 3,511,993 branch-misses # 1.422 % ( +- 0.785% ) 246,893,561 branches # 261.433 M/sec ( +- 3.967% ) 1,392,727,757 instructions # 0.564 IPC ( +- 0.040% ) 2,468,142,397 cycles # 2613.494 M/sec ( +- 0.110% ) 7,747 page-faults # 0.008 M/sec ( +- 3.995% ) 897 CPU-migrations # 0.001 M/sec ( +- 2.383% ) 6,535 context-switches # 0.007 M/sec ( +- 1.993% ) 944.384228 task-clock-msecs # 3.177 CPUs ( +- 0.268% ) 0.297257643 seconds time elapsed ( +- 0.450% ) So we gain about 35% by using the kwset code. As a side effect of using kwset two grep tests are fixed by this patch. The first is fixed because kwset can deal with case-insensitive search containing NULs, something strcasestr cannot do. The second one is fixed because we consider patterns containing NULs as fixed strings (regcomp cannot accept patterns with NULs). Signed-off-by: Fredrik Kuivinen Signed-off-by: Junio C Hamano --- t/t7008-grep-binary.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 't/t7008-grep-binary.sh') diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh index e058d184d1..917a264eea 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7008-grep-binary.sh @@ -84,7 +84,7 @@ test_expect_success 'git grep -Fi Yf a' " git grep -f f -Fi a " -test_expect_failure 'git grep -Fi Yx a' " +test_expect_success 'git grep -Fi Yx a' " printf 'YQx' | q_to_nul >f && test_must_fail git grep -f f -Fi a " @@ -94,7 +94,7 @@ test_expect_success 'git grep yf a' " git grep -f f a " -test_expect_failure 'git grep yx a' " +test_expect_success 'git grep yx a' " printf 'yQx' | q_to_nul >f && test_must_fail git grep -f f a " -- cgit v1.2.3