diff options
Diffstat (limited to 'grep.c')
-rw-r--r-- | grep.c | 349 |
1 files changed, 267 insertions, 82 deletions
@@ -1,4 +1,5 @@ #include "cache.h" +#include "config.h" #include "grep.h" #include "userdiff.h" #include "xdiff-interface.h" @@ -34,10 +35,8 @@ void init_grep_defaults(void) memset(opt, 0, sizeof(*opt)); opt->relative = 1; opt->pathname = 1; - opt->regflags = REG_NEWLINE; opt->max_depth = -1; opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED; - opt->extended_regexp_option = 0; color_set(opt->color_context, ""); color_set(opt->color_filename, ""); color_set(opt->color_function, ""); @@ -78,10 +77,7 @@ int grep_config(const char *var, const char *value, void *cb) return -1; if (!strcmp(var, "grep.extendedregexp")) { - if (git_config_bool(var, value)) - opt->extended_regexp_option = 1; - else - opt->extended_regexp_option = 0; + opt->extended_regexp_option = git_config_bool(var, value); return 0; } @@ -156,7 +152,6 @@ void grep_init(struct grep_opt *opt, const char *prefix) opt->linenum = def->linenum; opt->max_depth = def->max_depth; opt->pathname = def->pathname; - opt->regflags = def->regflags; opt->relative = def->relative; opt->output = def->output; @@ -172,32 +167,51 @@ void grep_init(struct grep_opt *opt, const char *prefix) static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt) { + /* + * When committing to the pattern type by setting the relevant + * fields in grep_opt it's generally not necessary to zero out + * the fields we're not choosing, since they won't have been + * set by anything. The extended_regexp_option field is the + * only exception to this. + * + * This is because in the process of parsing grep.patternType + * & grep.extendedRegexp we set opt->pattern_type_option and + * opt->extended_regexp_option, respectively. We then + * internally use opt->extended_regexp_option to see if we're + * compiling an ERE. It must be unset if that's not actually + * the case. + */ + if (pattern_type != GREP_PATTERN_TYPE_ERE && + opt->extended_regexp_option) + opt->extended_regexp_option = 0; + switch (pattern_type) { case GREP_PATTERN_TYPE_UNSPECIFIED: /* fall through */ case GREP_PATTERN_TYPE_BRE: - opt->fixed = 0; - opt->pcre = 0; - opt->regflags &= ~REG_EXTENDED; break; case GREP_PATTERN_TYPE_ERE: - opt->fixed = 0; - opt->pcre = 0; - opt->regflags |= REG_EXTENDED; + opt->extended_regexp_option = 1; break; case GREP_PATTERN_TYPE_FIXED: opt->fixed = 1; - opt->pcre = 0; - opt->regflags &= ~REG_EXTENDED; break; case GREP_PATTERN_TYPE_PCRE: - opt->fixed = 0; - opt->pcre = 1; - opt->regflags &= ~REG_EXTENDED; +#ifdef USE_LIBPCRE2 + opt->pcre2 = 1; +#else + /* + * It's important that pcre1 always be assigned to + * even when there's no USE_LIBPCRE* defined. We still + * call the PCRE stub function, it just dies with + * "cannot use Perl-compatible regexes[...]". + */ + opt->pcre1 = 1; +#endif break; } } @@ -209,6 +223,11 @@ void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_o else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED) grep_set_pattern_type_option(opt->pattern_type_option, opt); else if (opt->extended_regexp_option) + /* + * This branch *must* happen after setting from the + * opt->pattern_type_option above, we don't want + * grep.extendedRegexp to override grep.patternType! + */ grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt); } @@ -324,8 +343,32 @@ static NORETURN void compile_regexp_failed(const struct grep_pat *p, die("%s'%s': %s", where, p->pattern, error); } -#ifdef USE_LIBPCRE -static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) +static int is_fixed(const char *s, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (is_regex_special(s[i])) + return 0; + } + + return 1; +} + +static int has_null(const char *s, size_t len) +{ + /* + * regcomp cannot accept patterns with NULs so when using it + * we consider any pattern containing a NUL fixed. + */ + if (memchr(s, 0, len)) + return 1; + + return 0; +} + +#ifdef USE_LIBPCRE1 +static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt) { const char *error; int erroffset; @@ -333,23 +376,36 @@ static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) if (opt->ignore_case) { if (has_non_ascii(p->pattern)) - p->pcre_tables = pcre_maketables(); + p->pcre1_tables = pcre_maketables(); options |= PCRE_CASELESS; } if (is_utf8_locale() && has_non_ascii(p->pattern)) options |= PCRE_UTF8; - p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset, - p->pcre_tables); - if (!p->pcre_regexp) + p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset, + p->pcre1_tables); + if (!p->pcre1_regexp) compile_regexp_failed(p, error); - p->pcre_extra_info = pcre_study(p->pcre_regexp, 0, &error); - if (!p->pcre_extra_info && error) + p->pcre1_extra_info = pcre_study(p->pcre1_regexp, PCRE_STUDY_JIT_COMPILE, &error); + if (!p->pcre1_extra_info && error) die("%s", error); + +#ifdef GIT_PCRE1_USE_JIT + pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on); + if (p->pcre1_jit_on == 1) { + p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024); + if (!p->pcre1_jit_stack) + die("Couldn't allocate PCRE JIT stack"); + pcre_assign_jit_stack(p->pcre1_extra_info, NULL, p->pcre1_jit_stack); + } else if (p->pcre1_jit_on != 0) { + die("BUG: The pcre1_jit_on variable should be 0 or 1, not %d", + p->pcre1_jit_on); + } +#endif } -static int pcrematch(struct grep_pat *p, const char *line, const char *eol, +static int pcre1match(struct grep_pat *p, const char *line, const char *eol, regmatch_t *match, int eflags) { int ovector[30], ret, flags = 0; @@ -357,8 +413,19 @@ static int pcrematch(struct grep_pat *p, const char *line, const char *eol, if (eflags & REG_NOTBOL) flags |= PCRE_NOTBOL; - ret = pcre_exec(p->pcre_regexp, p->pcre_extra_info, line, eol - line, - 0, flags, ovector, ARRAY_SIZE(ovector)); +#ifdef GIT_PCRE1_USE_JIT + if (p->pcre1_jit_on) { + ret = pcre_jit_exec(p->pcre1_regexp, p->pcre1_extra_info, line, + eol - line, 0, flags, ovector, + ARRAY_SIZE(ovector), p->pcre1_jit_stack); + } else +#endif + { + ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line, + eol - line, 0, flags, ovector, + ARRAY_SIZE(ovector)); + } + if (ret < 0 && ret != PCRE_ERROR_NOMATCH) die("pcre_exec failed with error code %d", ret); if (ret > 0) { @@ -370,55 +437,165 @@ static int pcrematch(struct grep_pat *p, const char *line, const char *eol, return ret; } -static void free_pcre_regexp(struct grep_pat *p) +static void free_pcre1_regexp(struct grep_pat *p) { - pcre_free(p->pcre_regexp); - pcre_free(p->pcre_extra_info); - pcre_free((void *)p->pcre_tables); + pcre_free(p->pcre1_regexp); +#ifdef GIT_PCRE1_USE_JIT + if (p->pcre1_jit_on) { + pcre_free_study(p->pcre1_extra_info); + pcre_jit_stack_free(p->pcre1_jit_stack); + } else +#endif + { + pcre_free(p->pcre1_extra_info); + } + pcre_free((void *)p->pcre1_tables); } -#else /* !USE_LIBPCRE */ -static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) +#else /* !USE_LIBPCRE1 */ +static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt) { die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE"); } -static int pcrematch(struct grep_pat *p, const char *line, const char *eol, +static int pcre1match(struct grep_pat *p, const char *line, const char *eol, regmatch_t *match, int eflags) { return 1; } -static void free_pcre_regexp(struct grep_pat *p) +static void free_pcre1_regexp(struct grep_pat *p) { } -#endif /* !USE_LIBPCRE */ +#endif /* !USE_LIBPCRE1 */ -static int is_fixed(const char *s, size_t len) +#ifdef USE_LIBPCRE2 +static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt) { - size_t i; + int error; + PCRE2_UCHAR errbuf[256]; + PCRE2_SIZE erroffset; + int options = PCRE2_MULTILINE; + const uint8_t *character_tables = NULL; + int jitret; - /* regcomp cannot accept patterns with NULs so we - * consider any pattern containing a NUL fixed. - */ - if (memchr(s, 0, len)) - return 1; + assert(opt->pcre2); - for (i = 0; i < len; i++) { - if (is_regex_special(s[i])) - return 0; + p->pcre2_compile_context = NULL; + + if (opt->ignore_case) { + if (has_non_ascii(p->pattern)) { + character_tables = pcre2_maketables(NULL); + p->pcre2_compile_context = pcre2_compile_context_create(NULL); + pcre2_set_character_tables(p->pcre2_compile_context, character_tables); + } + options |= PCRE2_CASELESS; + } + if (is_utf8_locale() && has_non_ascii(p->pattern)) + options |= PCRE2_UTF; + + p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern, + p->patternlen, options, &error, &erroffset, + p->pcre2_compile_context); + + if (p->pcre2_pattern) { + p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, NULL); + if (!p->pcre2_match_data) + die("Couldn't allocate PCRE2 match data"); + } else { + pcre2_get_error_message(error, errbuf, sizeof(errbuf)); + compile_regexp_failed(p, (const char *)&errbuf); + } + + pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on); + if (p->pcre2_jit_on == 1) { + jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE); + if (jitret) + die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret); + p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL); + if (!p->pcre2_jit_stack) + die("Couldn't allocate PCRE2 JIT stack"); + p->pcre2_match_context = pcre2_match_context_create(NULL); + if (!p->pcre2_match_context) + die("Couldn't allocate PCRE2 match context"); + pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack); + } else if (p->pcre2_jit_on != 0) { + die("BUG: The pcre2_jit_on variable should be 0 or 1, not %d", + p->pcre1_jit_on); } +} + +static int pcre2match(struct grep_pat *p, const char *line, const char *eol, + regmatch_t *match, int eflags) +{ + int ret, flags = 0; + PCRE2_SIZE *ovector; + PCRE2_UCHAR errbuf[256]; + + if (eflags & REG_NOTBOL) + flags |= PCRE2_NOTBOL; + + if (p->pcre2_jit_on) + ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line, + eol - line, 0, flags, p->pcre2_match_data, + NULL); + else + ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line, + eol - line, 0, flags, p->pcre2_match_data, + NULL); + if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) { + pcre2_get_error_message(ret, errbuf, sizeof(errbuf)); + die("%s failed with error code %d: %s", + (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret, + errbuf); + } + if (ret > 0) { + ovector = pcre2_get_ovector_pointer(p->pcre2_match_data); + ret = 0; + match->rm_so = (int)ovector[0]; + match->rm_eo = (int)ovector[1]; + } + + return ret; +} + +static void free_pcre2_pattern(struct grep_pat *p) +{ + pcre2_compile_context_free(p->pcre2_compile_context); + pcre2_code_free(p->pcre2_pattern); + pcre2_match_data_free(p->pcre2_match_data); + pcre2_jit_stack_free(p->pcre2_jit_stack); + pcre2_match_context_free(p->pcre2_match_context); +} +#else /* !USE_LIBPCRE2 */ +static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt) +{ + /* + * Unreachable until USE_LIBPCRE2 becomes synonymous with + * USE_LIBPCRE. See the sibling comment in + * grep_set_pattern_type_option(). + */ + die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE"); +} + +static int pcre2match(struct grep_pat *p, const char *line, const char *eol, + regmatch_t *match, int eflags) +{ return 1; } +static void free_pcre2_pattern(struct grep_pat *p) +{ +} +#endif /* !USE_LIBPCRE2 */ + static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) { struct strbuf sb = STRBUF_INIT; int err; - int regflags; + int regflags = 0; basic_regex_quote_buf(&sb, p->pattern); - regflags = opt->regflags & ~REG_EXTENDED; if (opt->ignore_case) regflags |= REG_ICASE; err = regcomp(&p->regexp, sb.buf, regflags); @@ -435,12 +612,12 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { - int icase, ascii_only; + int ascii_only; int err; + int regflags = REG_NEWLINE; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; - icase = opt->regflags & REG_ICASE || p->ignore_case; ascii_only = !has_non_ascii(p->pattern); /* @@ -455,13 +632,13 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) * simple string match using kws. p->fixed tells us if we * want to use kws. */ - if (opt->fixed || is_fixed(p->pattern, p->patternlen)) - p->fixed = !icase || ascii_only; - else - p->fixed = 0; + if (opt->fixed || + has_null(p->pattern, p->patternlen) || + is_fixed(p->pattern, p->patternlen)) + p->fixed = !p->ignore_case || ascii_only; if (p->fixed) { - p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL); + p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL); kwsincr(p->kws, p->pattern, p->patternlen); kwsprep(p->kws); return; @@ -475,12 +652,21 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) return; } - if (opt->pcre) { - compile_pcre_regexp(p, opt); + if (opt->pcre2) { + compile_pcre2_pattern(p, opt); return; } - err = regcomp(&p->regexp, p->pattern, opt->regflags); + if (opt->pcre1) { + compile_pcre1_regexp(p, opt); + return; + } + + if (p->ignore_case) + regflags |= REG_ICASE; + if (opt->extended_regexp_option) + regflags |= REG_EXTENDED; + err = regcomp(&p->regexp, p->pattern, regflags); if (err) { char errbuf[1024]; regerror(err, &p->regexp, errbuf, 1024); @@ -832,8 +1018,10 @@ void free_grep_patterns(struct grep_opt *opt) case GREP_PATTERN_BODY: if (p->kws) kwsfree(p->kws); - else if (p->pcre_regexp) - free_pcre_regexp(p); + else if (p->pcre1_regexp) + free_pcre1_regexp(p); + else if (p->pcre2_pattern) + free_pcre2_pattern(p); else regfree(&p->regexp); free(p->pattern); @@ -912,8 +1100,10 @@ static int patmatch(struct grep_pat *p, char *line, char *eol, if (p->fixed) hit = !fixmatch(p, line, eol, match); - else if (p->pcre_regexp) - hit = !pcrematch(p, line, eol, match, eflags); + else if (p->pcre1_regexp) + hit = !pcre1match(p, line, eol, match, eflags); + else if (p->pcre2_pattern) + hit = !pcre2match(p, line, eol, match, eflags); else hit = !regexec_buf(&p->regexp, line, eol - line, 1, match, eflags); @@ -1403,11 +1593,11 @@ static int fill_textconv_grep(struct userdiff_driver *driver, */ df = alloc_filespec(gs->path); switch (gs->type) { - case GREP_SOURCE_SHA1: + case GREP_SOURCE_OID: fill_filespec(df, gs->identifier, 1, 0100644); break; case GREP_SOURCE_FILE: - fill_filespec(df, null_sha1, 0, 0100644); + fill_filespec(df, &null_oid, 0, 0100644); break; default: die("BUG: attempt to textconv something without a path?"); @@ -1747,9 +1937,8 @@ void grep_source_init(struct grep_source *gs, enum grep_source_type type, * If the identifier is non-NULL (in the submodule case) it * will be a SHA1 that needs to be copied. */ - case GREP_SOURCE_SHA1: - gs->identifier = xmalloc(20); - hashcpy(gs->identifier, identifier); + case GREP_SOURCE_OID: + gs->identifier = oiddup(identifier); break; case GREP_SOURCE_BUF: gs->identifier = NULL; @@ -1759,12 +1948,9 @@ void grep_source_init(struct grep_source *gs, enum grep_source_type type, void grep_source_clear(struct grep_source *gs) { - free(gs->name); - gs->name = NULL; - free(gs->path); - gs->path = NULL; - free(gs->identifier); - gs->identifier = NULL; + FREE_AND_NULL(gs->name); + FREE_AND_NULL(gs->path); + FREE_AND_NULL(gs->identifier); grep_source_clear_data(gs); } @@ -1772,10 +1958,9 @@ void grep_source_clear_data(struct grep_source *gs) { switch (gs->type) { case GREP_SOURCE_FILE: - case GREP_SOURCE_SHA1: + case GREP_SOURCE_OID: case GREP_SOURCE_SUBMODULE: - free(gs->buf); - gs->buf = NULL; + FREE_AND_NULL(gs->buf); gs->size = 0; break; case GREP_SOURCE_BUF: @@ -1784,7 +1969,7 @@ void grep_source_clear_data(struct grep_source *gs) } } -static int grep_source_load_sha1(struct grep_source *gs) +static int grep_source_load_oid(struct grep_source *gs) { enum object_type type; @@ -1795,7 +1980,7 @@ static int grep_source_load_sha1(struct grep_source *gs) if (!gs->buf) return error(_("'%s': unable to read %s"), gs->name, - sha1_to_hex(gs->identifier)); + oid_to_hex(gs->identifier)); return 0; } @@ -1841,8 +2026,8 @@ static int grep_source_load(struct grep_source *gs) switch (gs->type) { case GREP_SOURCE_FILE: return grep_source_load_file(gs); - case GREP_SOURCE_SHA1: - return grep_source_load_sha1(gs); + case GREP_SOURCE_OID: + return grep_source_load_oid(gs); case GREP_SOURCE_BUF: return gs->buf ? 0 : -1; case GREP_SOURCE_SUBMODULE: |