diff options
author | Phillip Wood <phillip.wood@dunelm.org.uk> | 2021-05-04 09:27:34 +0000 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2021-05-05 18:53:42 +0900 |
commit | 0324e8fc6b297c9e61745dc4e7d110780334157d (patch) | |
tree | 1fd1960912ae578b9ebd1a26b35d7208ba89befb | |
parent | Git 2.31.1 (diff) | |
download | tgif-0324e8fc6b297c9e61745dc4e7d110780334157d.tar.xz |
word diff: handle zero length matches
If find_word_boundaries() encounters a zero length match (which can be
caused by matching a newline or using '*' instead of '+' in the regex)
we stop splitting the input into words which generates an inaccurate
diff. To fix this increment the start point when there is a zero
length match and try a new match. This is safe as posix regular
expressions always return the longest available match so a zero length
match means there are no longer matches available from the current
position.
Commit bf82940dbf1 (color-words: enable REG_NEWLINE to help user,
2009-01-17) prevented matching newlines in negated character classes
but it is still possible for the user to have an explicit newline
match in the regex which could cause a zero length match.
One could argue that having explicit newline matches or using '*'
rather than '+' are user errors but it seems to be better to work
round them than produce inaccurate diffs.
Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r-- | diff.c | 10 | ||||
-rwxr-xr-x | t/t4034-diff-words.sh | 5 |
2 files changed, 12 insertions, 3 deletions
@@ -2053,7 +2053,7 @@ static void fn_out_diff_words_aux(void *priv, static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex, int *begin, int *end) { - if (word_regex && *begin < buffer->size) { + while (word_regex && *begin < buffer->size) { regmatch_t match[1]; if (!regexec_buf(word_regex, buffer->ptr + *begin, buffer->size - *begin, 1, match, 0)) { @@ -2061,9 +2061,13 @@ static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex, '\n', match[0].rm_eo - match[0].rm_so); *end = p ? p - buffer->ptr : match[0].rm_eo + *begin; *begin += match[0].rm_so; - return *begin >= *end; + if (*begin == *end) + (*begin)++; + else + return *begin > *end; + } else { + return -1; } - return -1; } /* find the next word */ diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh index 56f1e62a97..17ceba9f61 100755 --- a/t/t4034-diff-words.sh +++ b/t/t4034-diff-words.sh @@ -184,6 +184,11 @@ test_expect_success 'word diff with a regular expression' ' word_diff --color-words="[a-z]+" ' +test_expect_success 'word diff with zero length matches' ' + cp expect.letter-runs-are-words expect && + word_diff --color-words="[a-z${LF}]*" +' + test_expect_success 'set up a diff driver' ' git config diff.testdriver.wordRegex "[^[:space:]]" && cat <<-\EOF >.gitattributes |