diff options
Diffstat (limited to 'grep.c')
-rw-r--r-- | grep.c | 292 |
1 files changed, 236 insertions, 56 deletions
@@ -179,26 +179,38 @@ static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, st case GREP_PATTERN_TYPE_BRE: opt->fixed = 0; - opt->pcre = 0; - opt->regflags &= ~REG_EXTENDED; + opt->pcre1 = 0; + opt->pcre2 = 0; break; case GREP_PATTERN_TYPE_ERE: opt->fixed = 0; - opt->pcre = 0; + opt->pcre1 = 0; + opt->pcre2 = 0; opt->regflags |= REG_EXTENDED; break; case GREP_PATTERN_TYPE_FIXED: opt->fixed = 1; - opt->pcre = 0; - opt->regflags &= ~REG_EXTENDED; + opt->pcre1 = 0; + opt->pcre2 = 0; break; case GREP_PATTERN_TYPE_PCRE: opt->fixed = 0; - opt->pcre = 1; - opt->regflags &= ~REG_EXTENDED; +#ifdef USE_LIBPCRE2 + opt->pcre1 = 0; + opt->pcre2 = 1; +#else + /* + * It's important that pcre1 always be assigned to + * even when there's no USE_LIBPCRE* defined. We still + * call the PCRE stub function, it just dies with + * "cannot use Perl-compatible regexes[...]". + */ + opt->pcre1 = 1; + opt->pcre2 = 0; +#endif break; } } @@ -325,8 +337,32 @@ static NORETURN void compile_regexp_failed(const struct grep_pat *p, die("%s'%s': %s", where, p->pattern, error); } -#ifdef USE_LIBPCRE -static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) +static int is_fixed(const char *s, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (is_regex_special(s[i])) + return 0; + } + + return 1; +} + +static int has_null(const char *s, size_t len) +{ + /* + * regcomp cannot accept patterns with NULs so when using it + * we consider any pattern containing a NUL fixed. + */ + if (memchr(s, 0, len)) + return 1; + + return 0; +} + +#ifdef USE_LIBPCRE1 +static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt) { const char *error; int erroffset; @@ -334,23 +370,36 @@ static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) if (opt->ignore_case) { if (has_non_ascii(p->pattern)) - p->pcre_tables = pcre_maketables(); + p->pcre1_tables = pcre_maketables(); options |= PCRE_CASELESS; } if (is_utf8_locale() && has_non_ascii(p->pattern)) options |= PCRE_UTF8; - p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset, - p->pcre_tables); - if (!p->pcre_regexp) + p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset, + p->pcre1_tables); + if (!p->pcre1_regexp) compile_regexp_failed(p, error); - p->pcre_extra_info = pcre_study(p->pcre_regexp, 0, &error); - if (!p->pcre_extra_info && error) + p->pcre1_extra_info = pcre_study(p->pcre1_regexp, PCRE_STUDY_JIT_COMPILE, &error); + if (!p->pcre1_extra_info && error) die("%s", error); + +#ifdef GIT_PCRE1_USE_JIT + pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on); + if (p->pcre1_jit_on == 1) { + p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024); + if (!p->pcre1_jit_stack) + die("Couldn't allocate PCRE JIT stack"); + pcre_assign_jit_stack(p->pcre1_extra_info, NULL, p->pcre1_jit_stack); + } else if (p->pcre1_jit_on != 0) { + die("BUG: The pcre1_jit_on variable should be 0 or 1, not %d", + p->pcre1_jit_on); + } +#endif } -static int pcrematch(struct grep_pat *p, const char *line, const char *eol, +static int pcre1match(struct grep_pat *p, const char *line, const char *eol, regmatch_t *match, int eflags) { int ovector[30], ret, flags = 0; @@ -358,8 +407,19 @@ static int pcrematch(struct grep_pat *p, const char *line, const char *eol, if (eflags & REG_NOTBOL) flags |= PCRE_NOTBOL; - ret = pcre_exec(p->pcre_regexp, p->pcre_extra_info, line, eol - line, - 0, flags, ovector, ARRAY_SIZE(ovector)); +#ifdef GIT_PCRE1_USE_JIT + if (p->pcre1_jit_on) { + ret = pcre_jit_exec(p->pcre1_regexp, p->pcre1_extra_info, line, + eol - line, 0, flags, ovector, + ARRAY_SIZE(ovector), p->pcre1_jit_stack); + } else +#endif + { + ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line, + eol - line, 0, flags, ovector, + ARRAY_SIZE(ovector)); + } + if (ret < 0 && ret != PCRE_ERROR_NOMATCH) die("pcre_exec failed with error code %d", ret); if (ret > 0) { @@ -371,55 +431,165 @@ static int pcrematch(struct grep_pat *p, const char *line, const char *eol, return ret; } -static void free_pcre_regexp(struct grep_pat *p) +static void free_pcre1_regexp(struct grep_pat *p) { - pcre_free(p->pcre_regexp); - pcre_free(p->pcre_extra_info); - pcre_free((void *)p->pcre_tables); + pcre_free(p->pcre1_regexp); +#ifdef GIT_PCRE1_USE_JIT + if (p->pcre1_jit_on) { + pcre_free_study(p->pcre1_extra_info); + pcre_jit_stack_free(p->pcre1_jit_stack); + } else +#endif + { + pcre_free(p->pcre1_extra_info); + } + pcre_free((void *)p->pcre1_tables); } -#else /* !USE_LIBPCRE */ -static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) +#else /* !USE_LIBPCRE1 */ +static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt) { die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE"); } -static int pcrematch(struct grep_pat *p, const char *line, const char *eol, +static int pcre1match(struct grep_pat *p, const char *line, const char *eol, regmatch_t *match, int eflags) { return 1; } -static void free_pcre_regexp(struct grep_pat *p) +static void free_pcre1_regexp(struct grep_pat *p) { } -#endif /* !USE_LIBPCRE */ +#endif /* !USE_LIBPCRE1 */ -static int is_fixed(const char *s, size_t len) +#ifdef USE_LIBPCRE2 +static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt) { - size_t i; + int error; + PCRE2_UCHAR errbuf[256]; + PCRE2_SIZE erroffset; + int options = PCRE2_MULTILINE; + const uint8_t *character_tables = NULL; + int jitret; - /* regcomp cannot accept patterns with NULs so we - * consider any pattern containing a NUL fixed. - */ - if (memchr(s, 0, len)) - return 1; + assert(opt->pcre2); - for (i = 0; i < len; i++) { - if (is_regex_special(s[i])) - return 0; + p->pcre2_compile_context = NULL; + + if (opt->ignore_case) { + if (has_non_ascii(p->pattern)) { + character_tables = pcre2_maketables(NULL); + p->pcre2_compile_context = pcre2_compile_context_create(NULL); + pcre2_set_character_tables(p->pcre2_compile_context, character_tables); + } + options |= PCRE2_CASELESS; + } + if (is_utf8_locale() && has_non_ascii(p->pattern)) + options |= PCRE2_UTF; + + p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern, + p->patternlen, options, &error, &erroffset, + p->pcre2_compile_context); + + if (p->pcre2_pattern) { + p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, NULL); + if (!p->pcre2_match_data) + die("Couldn't allocate PCRE2 match data"); + } else { + pcre2_get_error_message(error, errbuf, sizeof(errbuf)); + compile_regexp_failed(p, (const char *)&errbuf); + } + + pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on); + if (p->pcre2_jit_on == 1) { + jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE); + if (jitret) + die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret); + p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL); + if (!p->pcre2_jit_stack) + die("Couldn't allocate PCRE2 JIT stack"); + p->pcre2_match_context = pcre2_match_context_create(NULL); + if (!p->pcre2_match_context) + die("Couldn't allocate PCRE2 match context"); + pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack); + } else if (p->pcre2_jit_on != 0) { + die("BUG: The pcre2_jit_on variable should be 0 or 1, not %d", + p->pcre1_jit_on); } +} + +static int pcre2match(struct grep_pat *p, const char *line, const char *eol, + regmatch_t *match, int eflags) +{ + int ret, flags = 0; + PCRE2_SIZE *ovector; + PCRE2_UCHAR errbuf[256]; + + if (eflags & REG_NOTBOL) + flags |= PCRE2_NOTBOL; + if (p->pcre2_jit_on) + ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line, + eol - line, 0, flags, p->pcre2_match_data, + NULL); + else + ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line, + eol - line, 0, flags, p->pcre2_match_data, + NULL); + + if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) { + pcre2_get_error_message(ret, errbuf, sizeof(errbuf)); + die("%s failed with error code %d: %s", + (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret, + errbuf); + } + if (ret > 0) { + ovector = pcre2_get_ovector_pointer(p->pcre2_match_data); + ret = 0; + match->rm_so = (int)ovector[0]; + match->rm_eo = (int)ovector[1]; + } + + return ret; +} + +static void free_pcre2_pattern(struct grep_pat *p) +{ + pcre2_compile_context_free(p->pcre2_compile_context); + pcre2_code_free(p->pcre2_pattern); + pcre2_match_data_free(p->pcre2_match_data); + pcre2_jit_stack_free(p->pcre2_jit_stack); + pcre2_match_context_free(p->pcre2_match_context); +} +#else /* !USE_LIBPCRE2 */ +static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt) +{ + /* + * Unreachable until USE_LIBPCRE2 becomes synonymous with + * USE_LIBPCRE. See the sibling comment in + * grep_set_pattern_type_option(). + */ + die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE"); +} + +static int pcre2match(struct grep_pat *p, const char *line, const char *eol, + regmatch_t *match, int eflags) +{ return 1; } +static void free_pcre2_pattern(struct grep_pat *p) +{ +} +#endif /* !USE_LIBPCRE2 */ + static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) { struct strbuf sb = STRBUF_INIT; int err; - int regflags; + int regflags = opt->regflags; basic_regex_quote_buf(&sb, p->pattern); - regflags = opt->regflags & ~REG_EXTENDED; if (opt->ignore_case) regflags |= REG_ICASE; err = regcomp(&p->regexp, sb.buf, regflags); @@ -456,7 +626,9 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) * simple string match using kws. p->fixed tells us if we * want to use kws. */ - if (opt->fixed || is_fixed(p->pattern, p->patternlen)) + if (opt->fixed || + has_null(p->pattern, p->patternlen) || + is_fixed(p->pattern, p->patternlen)) p->fixed = !icase || ascii_only; else p->fixed = 0; @@ -476,8 +648,13 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) return; } - if (opt->pcre) { - compile_pcre_regexp(p, opt); + if (opt->pcre2) { + compile_pcre2_pattern(p, opt); + return; + } + + if (opt->pcre1) { + compile_pcre1_regexp(p, opt); return; } @@ -833,8 +1010,10 @@ void free_grep_patterns(struct grep_opt *opt) case GREP_PATTERN_BODY: if (p->kws) kwsfree(p->kws); - else if (p->pcre_regexp) - free_pcre_regexp(p); + else if (p->pcre1_regexp) + free_pcre1_regexp(p); + else if (p->pcre2_pattern) + free_pcre2_pattern(p); else regfree(&p->regexp); free(p->pattern); @@ -913,8 +1092,10 @@ static int patmatch(struct grep_pat *p, char *line, char *eol, if (p->fixed) hit = !fixmatch(p, line, eol, match); - else if (p->pcre_regexp) - hit = !pcrematch(p, line, eol, match, eflags); + else if (p->pcre1_regexp) + hit = !pcre1match(p, line, eol, match, eflags); + else if (p->pcre2_pattern) + hit = !pcre2match(p, line, eol, match, eflags); else hit = !regexec_buf(&p->regexp, line, eol - line, 1, match, eflags); @@ -1404,11 +1585,11 @@ static int fill_textconv_grep(struct userdiff_driver *driver, */ df = alloc_filespec(gs->path); switch (gs->type) { - case GREP_SOURCE_SHA1: + case GREP_SOURCE_OID: fill_filespec(df, gs->identifier, 1, 0100644); break; case GREP_SOURCE_FILE: - fill_filespec(df, null_sha1, 0, 0100644); + fill_filespec(df, &null_oid, 0, 0100644); break; default: die("BUG: attempt to textconv something without a path?"); @@ -1748,9 +1929,8 @@ void grep_source_init(struct grep_source *gs, enum grep_source_type type, * If the identifier is non-NULL (in the submodule case) it * will be a SHA1 that needs to be copied. */ - case GREP_SOURCE_SHA1: - gs->identifier = xmalloc(20); - hashcpy(gs->identifier, identifier); + case GREP_SOURCE_OID: + gs->identifier = oiddup(identifier); break; case GREP_SOURCE_BUF: gs->identifier = NULL; @@ -1773,7 +1953,7 @@ void grep_source_clear_data(struct grep_source *gs) { switch (gs->type) { case GREP_SOURCE_FILE: - case GREP_SOURCE_SHA1: + case GREP_SOURCE_OID: case GREP_SOURCE_SUBMODULE: free(gs->buf); gs->buf = NULL; @@ -1785,7 +1965,7 @@ void grep_source_clear_data(struct grep_source *gs) } } -static int grep_source_load_sha1(struct grep_source *gs) +static int grep_source_load_oid(struct grep_source *gs) { enum object_type type; @@ -1796,7 +1976,7 @@ static int grep_source_load_sha1(struct grep_source *gs) if (!gs->buf) return error(_("'%s': unable to read %s"), gs->name, - sha1_to_hex(gs->identifier)); + oid_to_hex(gs->identifier)); return 0; } @@ -1842,8 +2022,8 @@ static int grep_source_load(struct grep_source *gs) switch (gs->type) { case GREP_SOURCE_FILE: return grep_source_load_file(gs); - case GREP_SOURCE_SHA1: - return grep_source_load_sha1(gs); + case GREP_SOURCE_OID: + return grep_source_load_oid(gs); case GREP_SOURCE_BUF: return gs->buf ? 0 : -1; case GREP_SOURCE_SUBMODULE: |