diff options
Diffstat (limited to 'grep.c')
-rw-r--r-- | grep.c | 108 |
1 files changed, 74 insertions, 34 deletions
@@ -4,6 +4,8 @@ #include "xdiff-interface.h" #include "diff.h" #include "diffcore.h" +#include "commit.h" +#include "quote.h" static int grep_source_load(struct grep_source *gs); static int grep_source_is_binary(struct grep_source *gs); @@ -161,17 +163,7 @@ void grep_init(struct grep_opt *opt, const char *prefix) color_set(opt->color_sep, def->color_sep); } -void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_opt *opt) -{ - if (pattern_type != GREP_PATTERN_TYPE_UNSPECIFIED) - grep_set_pattern_type_option(pattern_type, opt); - else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED) - grep_set_pattern_type_option(opt->pattern_type_option, opt); - else if (opt->extended_regexp_option) - grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt); -} - -void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt) +static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt) { switch (pattern_type) { case GREP_PATTERN_TYPE_UNSPECIFIED: @@ -203,6 +195,16 @@ void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct gr } } +void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_opt *opt) +{ + if (pattern_type != GREP_PATTERN_TYPE_UNSPECIFIED) + grep_set_pattern_type_option(pattern_type, opt); + else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED) + grep_set_pattern_type_option(opt->pattern_type_option, opt); + else if (opt->extended_regexp_option) + grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt); +} + static struct grep_pat *create_grep_pat(const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t, @@ -322,11 +324,16 @@ static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) int erroffset; int options = PCRE_MULTILINE; - if (opt->ignore_case) + if (opt->ignore_case) { + if (has_non_ascii(p->pattern)) + p->pcre_tables = pcre_maketables(); options |= PCRE_CASELESS; + } + if (is_utf8_locale() && has_non_ascii(p->pattern)) + options |= PCRE_UTF8; p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset, - NULL); + p->pcre_tables); if (!p->pcre_regexp) compile_regexp_failed(p, error); @@ -360,6 +367,7 @@ static void free_pcre_regexp(struct grep_pat *p) { pcre_free(p->pcre_regexp); pcre_free(p->pcre_extra_info); + pcre_free((void *)p->pcre_tables); } #else /* !USE_LIBPCRE */ static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) @@ -396,26 +404,68 @@ static int is_fixed(const char *s, size_t len) return 1; } +static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) +{ + struct strbuf sb = STRBUF_INIT; + int err; + int regflags; + + basic_regex_quote_buf(&sb, p->pattern); + regflags = opt->regflags & ~REG_EXTENDED; + if (opt->ignore_case) + regflags |= REG_ICASE; + err = regcomp(&p->regexp, sb.buf, regflags); + if (opt->debug) + fprintf(stderr, "fixed %s\n", sb.buf); + strbuf_release(&sb); + if (err) { + char errbuf[1024]; + regerror(err, &p->regexp, errbuf, sizeof(errbuf)); + regfree(&p->regexp); + compile_regexp_failed(p, errbuf); + } +} + static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { + int icase, ascii_only; int err; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; + icase = opt->regflags & REG_ICASE || p->ignore_case; + ascii_only = !has_non_ascii(p->pattern); + /* + * Even when -F (fixed) asks us to do a non-regexp search, we + * may not be able to correctly case-fold when -i + * (ignore-case) is asked (in which case, we'll synthesize a + * regexp to match the pattern that matches regexp special + * characters literally, while ignoring case differences). On + * the other hand, even without -F, if the pattern does not + * have any regexp special characters and there is no need for + * case-folding search, we can internally turn it into a + * simple string match using kws. p->fixed tells us if we + * want to use kws. + */ if (opt->fixed || is_fixed(p->pattern, p->patternlen)) - p->fixed = 1; + p->fixed = !icase || ascii_only; else p->fixed = 0; if (p->fixed) { - if (opt->regflags & REG_ICASE || p->ignore_case) - p->kws = kwsalloc(tolower_trans_tbl); - else - p->kws = kwsalloc(NULL); + p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL); kwsincr(p->kws, p->pattern, p->patternlen); kwsprep(p->kws); return; + } else if (opt->fixed) { + /* + * We come here when the pattern has the non-ascii + * characters we cannot case-fold, and asked to + * ignore-case. + */ + compile_fixed_regexp(p, opt); + return; } if (opt->pcre) { @@ -643,10 +693,10 @@ static struct grep_expr *prep_header_patterns(struct grep_opt *opt) for (p = opt->header_list; p; p = p->next) { if (p->token != GREP_PATTERN_HEAD) - die("bug: a non-header pattern in grep header list."); + die("BUG: a non-header pattern in grep header list."); if (p->field < GREP_HEADER_FIELD_MIN || GREP_HEADER_FIELD_MAX <= p->field) - die("bug: unknown header field %d", p->field); + die("BUG: unknown header field %d", p->field); compile_regexp(p, opt); } @@ -659,7 +709,7 @@ static struct grep_expr *prep_header_patterns(struct grep_opt *opt) h = compile_pattern_atom(&pp); if (!h || pp != p->next) - die("bug: malformed header expr"); + die("BUG: malformed header expr"); if (!header_group[p->field]) { header_group[p->field] = h; continue; @@ -848,17 +898,6 @@ static int fixmatch(struct grep_pat *p, char *line, char *eol, } } -static int regmatch(const regex_t *preg, char *line, char *eol, - regmatch_t *match, int eflags) -{ -#ifdef REG_STARTEND - match->rm_so = 0; - match->rm_eo = eol - line; - eflags |= REG_STARTEND; -#endif - return regexec(preg, line, 1, match, eflags); -} - static int patmatch(struct grep_pat *p, char *line, char *eol, regmatch_t *match, int eflags) { @@ -869,7 +908,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol, else if (p->pcre_regexp) hit = !pcrematch(p, line, eol, match, eflags); else - hit = !regmatch(&p->regexp, line, eol, match, eflags); + hit = !regexec_buf(&p->regexp, line, eol - line, 1, match, + eflags); return hit; } @@ -1464,7 +1504,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle case GREP_BINARY_TEXT: break; default: - die("bug: unknown binary handling mode"); + die("BUG: unknown binary handling mode"); } } |