diff options
-rw-r--r-- | Makefile | 36 | ||||
-rw-r--r-- | configure.ac | 77 | ||||
-rw-r--r-- | grep.c | 145 | ||||
-rw-r--r-- | grep.h | 17 | ||||
-rw-r--r-- | t/test-lib.sh | 2 |
5 files changed, 256 insertions, 21 deletions
@@ -29,6 +29,11 @@ all:: # Perl-compatible regular expressions instead of standard or extended # POSIX regular expressions. # +# Currently USE_LIBPCRE is a synonym for USE_LIBPCRE1, define +# USE_LIBPCRE2 instead if you'd like to use version 2 of the PCRE +# library. The USE_LIBPCRE flag will likely be changed to mean v2 by +# default in future releases. +# # When using USE_LIBPCRE1, define NO_LIBPCRE1_JIT if the PCRE v1 # library is compiled without --enable-jit. We will auto-detect # whether the version of the PCRE v1 library in use has JIT support at @@ -37,8 +42,10 @@ all:: # you have link-time errors about a missing `pcre_jit_exec` define # this, or recompile PCRE v1 with --enable-jit. # -# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in -# /foo/bar/include and /foo/bar/lib directories. +# Define LIBPCREDIR=/foo/bar if your PCRE header and library files are +# in /foo/bar/include and /foo/bar/lib directories. Which version of +# PCRE this points to determined by the USE_LIBPCRE1 and USE_LIBPCRE2 +# variables. # # Define HAVE_ALLOCA_H if you have working alloca(3) defined in that header. # @@ -1095,12 +1102,14 @@ ifdef NO_LIBGEN_H COMPAT_OBJS += compat/basename.o endif -ifdef USE_LIBPCRE - BASIC_CFLAGS += -DUSE_LIBPCRE1 - ifdef LIBPCREDIR - BASIC_CFLAGS += -I$(LIBPCREDIR)/include - EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib) +USE_LIBPCRE1 ?= $(USE_LIBPCRE) + +ifneq (,$(USE_LIBPCRE1)) + ifdef USE_LIBPCRE2 +$(error Only set USE_LIBPCRE1 (or its alias USE_LIBPCRE) or USE_LIBPCRE2, not both!) endif + + BASIC_CFLAGS += -DUSE_LIBPCRE1 EXTLIBS += -lpcre ifdef NO_LIBPCRE1_JIT @@ -1108,6 +1117,16 @@ ifdef NO_LIBPCRE1_JIT endif endif +ifdef USE_LIBPCRE2 + BASIC_CFLAGS += -DUSE_LIBPCRE2 + EXTLIBS += -lpcre2-8 +endif + +ifdef LIBPCREDIR + BASIC_CFLAGS += -I$(LIBPCREDIR)/include + EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib) +endif + ifdef HAVE_ALLOCA_H BASIC_CFLAGS += -DHAVE_ALLOCA_H endif @@ -2252,7 +2271,8 @@ GIT-BUILD-OPTIONS: FORCE @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@+ @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@+ @echo NO_EXPAT=\''$(subst ','\'',$(subst ','\'',$(NO_EXPAT)))'\' >>$@+ - @echo USE_LIBPCRE1=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE)))'\' >>$@+ + @echo USE_LIBPCRE1=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE1)))'\' >>$@+ + @echo USE_LIBPCRE2=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE2)))'\' >>$@+ @echo NO_LIBPCRE1_JIT=\''$(subst ','\'',$(subst ','\'',$(NO_LIBPCRE1_JIT)))'\' >>$@+ @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@+ @echo NO_PTHREADS=\''$(subst ','\'',$(subst ','\'',$(NO_PTHREADS)))'\' >>$@+ diff --git a/configure.ac b/configure.ac index deeb968daa..11d083fbe0 100644 --- a/configure.ac +++ b/configure.ac @@ -255,21 +255,61 @@ GIT_PARSE_WITH([openssl])) # Perl-compatible regular expressions instead of standard or extended # POSIX regular expressions. # -# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in +# Currently USE_LIBPCRE is a synonym for USE_LIBPCRE1, define +# USE_LIBPCRE2 instead if you'd like to use version 2 of the PCRE +# library. The USE_LIBPCRE flag will likely be changed to mean v2 by +# default in future releases. +# +# Define LIBPCREDIR=/foo/bar if your PCRE header and library files are in # /foo/bar/include and /foo/bar/lib directories. # AC_ARG_WITH(libpcre, -AS_HELP_STRING([--with-libpcre],[support Perl-compatible regexes (default is NO)]) +AS_HELP_STRING([--with-libpcre],[synonym for --with-libpcre1]), + if test "$withval" = "no"; then + USE_LIBPCRE1= + elif test "$withval" = "yes"; then + USE_LIBPCRE1=YesPlease + else + USE_LIBPCRE1=YesPlease + LIBPCREDIR=$withval + AC_MSG_NOTICE([Setting LIBPCREDIR to $LIBPCREDIR]) + dnl USE_LIBPCRE1 can still be modified below, so don't substitute + dnl it yet. + GIT_CONF_SUBST([LIBPCREDIR]) + fi) + +AC_ARG_WITH(libpcre1, +AS_HELP_STRING([--with-libpcre1],[support Perl-compatible regexes via libpcre1 (default is NO)]) +AS_HELP_STRING([], [ARG can be also prefix for libpcre library and headers]), + if test "$withval" = "no"; then + USE_LIBPCRE1= + elif test "$withval" = "yes"; then + USE_LIBPCRE1=YesPlease + else + USE_LIBPCRE1=YesPlease + LIBPCREDIR=$withval + AC_MSG_NOTICE([Setting LIBPCREDIR to $LIBPCREDIR]) + dnl USE_LIBPCRE1 can still be modified below, so don't substitute + dnl it yet. + GIT_CONF_SUBST([LIBPCREDIR]) + fi) + +AC_ARG_WITH(libpcre2, +AS_HELP_STRING([--with-libpcre2],[support Perl-compatible regexes via libpcre2 (default is NO)]) AS_HELP_STRING([], [ARG can be also prefix for libpcre library and headers]), + if test -n "$USE_LIBPCRE1"; then + AC_MSG_ERROR([Only supply one of --with-libpcre1 or --with-libpcre2!]) + fi + if test "$withval" = "no"; then - USE_LIBPCRE= + USE_LIBPCRE2= elif test "$withval" = "yes"; then - USE_LIBPCRE=YesPlease + USE_LIBPCRE2=YesPlease else - USE_LIBPCRE=YesPlease + USE_LIBPCRE2=YesPlease LIBPCREDIR=$withval AC_MSG_NOTICE([Setting LIBPCREDIR to $LIBPCREDIR]) - dnl USE_LIBPCRE can still be modified below, so don't substitute + dnl USE_LIBPCRE2 can still be modified below, so don't substitute dnl it yet. GIT_CONF_SUBST([LIBPCREDIR]) fi) @@ -501,13 +541,11 @@ GIT_CONF_SUBST([NEEDS_SSL_WITH_CRYPTO]) GIT_CONF_SUBST([NO_OPENSSL]) # -# Define USE_LIBPCRE if you have and want to use libpcre. Various -# commands such as log and grep offer runtime options to use -# Perl-compatible regular expressions instead of standard or extended -# POSIX regular expressions. +# Handle the USE_LIBPCRE1 and USE_LIBPCRE2 options potentially set +# above. # -if test -n "$USE_LIBPCRE"; then +if test -n "$USE_LIBPCRE1"; then GIT_STASH_FLAGS($LIBPCREDIR) @@ -517,7 +555,22 @@ AC_CHECK_LIB([pcre], [pcre_version], GIT_UNSTASH_FLAGS($LIBPCREDIR) -GIT_CONF_SUBST([USE_LIBPCRE]) +GIT_CONF_SUBST([USE_LIBPCRE1]) + +fi + + +if test -n "$USE_LIBPCRE2"; then + +GIT_STASH_FLAGS($LIBPCREDIR) + +AC_CHECK_LIB([pcre2-8], [pcre2_config_8], +[USE_LIBPCRE2=YesPlease], +[USE_LIBPCRE2=]) + +GIT_UNSTASH_FLAGS($LIBPCREDIR) + +GIT_CONF_SUBST([USE_LIBPCRE2]) fi @@ -179,22 +179,37 @@ static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, st case GREP_PATTERN_TYPE_BRE: opt->fixed = 0; opt->pcre1 = 0; + opt->pcre2 = 0; break; case GREP_PATTERN_TYPE_ERE: opt->fixed = 0; opt->pcre1 = 0; + opt->pcre2 = 0; opt->regflags |= REG_EXTENDED; break; case GREP_PATTERN_TYPE_FIXED: opt->fixed = 1; opt->pcre1 = 0; + opt->pcre2 = 0; break; case GREP_PATTERN_TYPE_PCRE: opt->fixed = 0; +#ifdef USE_LIBPCRE2 + opt->pcre1 = 0; + opt->pcre2 = 1; +#else + /* + * It's important that pcre1 always be assigned to + * even when there's no USE_LIBPCRE* defined. We still + * call the PCRE stub function, it just dies with + * "cannot use Perl-compatible regexes[...]". + */ opt->pcre1 = 1; + opt->pcre2 = 0; +#endif break; } } @@ -446,6 +461,127 @@ static void free_pcre1_regexp(struct grep_pat *p) } #endif /* !USE_LIBPCRE1 */ +#ifdef USE_LIBPCRE2 +static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt) +{ + int error; + PCRE2_UCHAR errbuf[256]; + PCRE2_SIZE erroffset; + int options = PCRE2_MULTILINE; + const uint8_t *character_tables = NULL; + int jitret; + + assert(opt->pcre2); + + p->pcre2_compile_context = NULL; + + if (opt->ignore_case) { + if (has_non_ascii(p->pattern)) { + character_tables = pcre2_maketables(NULL); + p->pcre2_compile_context = pcre2_compile_context_create(NULL); + pcre2_set_character_tables(p->pcre2_compile_context, character_tables); + } + options |= PCRE2_CASELESS; + } + if (is_utf8_locale() && has_non_ascii(p->pattern)) + options |= PCRE2_UTF; + + p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern, + p->patternlen, options, &error, &erroffset, + p->pcre2_compile_context); + + if (p->pcre2_pattern) { + p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, NULL); + if (!p->pcre2_match_data) + die("Couldn't allocate PCRE2 match data"); + } else { + pcre2_get_error_message(error, errbuf, sizeof(errbuf)); + compile_regexp_failed(p, (const char *)&errbuf); + } + + pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on); + if (p->pcre2_jit_on == 1) { + jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE); + if (jitret) + die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret); + p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL); + if (!p->pcre2_jit_stack) + die("Couldn't allocate PCRE2 JIT stack"); + p->pcre2_match_context = pcre2_match_context_create(NULL); + if (!p->pcre2_jit_stack) + die("Couldn't allocate PCRE2 match context"); + pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack); + } else if (p->pcre2_jit_on != 0) { + die("BUG: The pcre2_jit_on variable should be 0 or 1, not %d", + p->pcre1_jit_on); + } +} + +static int pcre2match(struct grep_pat *p, const char *line, const char *eol, + regmatch_t *match, int eflags) +{ + int ret, flags = 0; + PCRE2_SIZE *ovector; + PCRE2_UCHAR errbuf[256]; + + if (eflags & REG_NOTBOL) + flags |= PCRE2_NOTBOL; + + if (p->pcre2_jit_on) + ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line, + eol - line, 0, flags, p->pcre2_match_data, + NULL); + else + ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line, + eol - line, 0, flags, p->pcre2_match_data, + NULL); + + if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) { + pcre2_get_error_message(ret, errbuf, sizeof(errbuf)); + die("%s failed with error code %d: %s", + (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret, + errbuf); + } + if (ret > 0) { + ovector = pcre2_get_ovector_pointer(p->pcre2_match_data); + ret = 0; + match->rm_so = (int)ovector[0]; + match->rm_eo = (int)ovector[1]; + } + + return ret; +} + +static void free_pcre2_pattern(struct grep_pat *p) +{ + pcre2_compile_context_free(p->pcre2_compile_context); + pcre2_code_free(p->pcre2_pattern); + pcre2_match_data_free(p->pcre2_match_data); + pcre2_jit_stack_free(p->pcre2_jit_stack); + pcre2_match_context_free(p->pcre2_match_context); +} +#else /* !USE_LIBPCRE2 */ +static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt) +{ + /* + * Unreachable until USE_LIBPCRE2 becomes synonymous with + * USE_LIBPCRE. See the sibling comment in + * grep_set_pattern_type_option(). + */ + die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE"); +} + +static int pcre2match(struct grep_pat *p, const char *line, const char *eol, + regmatch_t *match, int eflags) +{ + return 1; +} + +static void free_pcre2_pattern(struct grep_pat *p) +{ +} +#endif /* !USE_LIBPCRE2 */ + static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) { struct strbuf sb = STRBUF_INIT; @@ -511,6 +647,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) return; } + if (opt->pcre2) { + compile_pcre2_pattern(p, opt); + return; + } + if (opt->pcre1) { compile_pcre1_regexp(p, opt); return; @@ -870,6 +1011,8 @@ void free_grep_patterns(struct grep_opt *opt) kwsfree(p->kws); else if (p->pcre1_regexp) free_pcre1_regexp(p); + else if (p->pcre2_pattern) + free_pcre2_pattern(p); else regfree(&p->regexp); free(p->pattern); @@ -950,6 +1093,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol, hit = !fixmatch(p, line, eol, match); else if (p->pcre1_regexp) hit = !pcre1match(p, line, eol, match, eflags); + else if (p->pcre2_pattern) + hit = !pcre2match(p, line, eol, match, eflags); else hit = !regexec_buf(&p->regexp, line, eol - line, 1, match, eflags); @@ -21,6 +21,16 @@ typedef int pcre; typedef int pcre_extra; typedef int pcre_jit_stack; #endif +#ifdef USE_LIBPCRE2 +#define PCRE2_CODE_UNIT_WIDTH 8 +#include <pcre2.h> +#else +typedef int pcre2_code; +typedef int pcre2_match_data; +typedef int pcre2_compile_context; +typedef int pcre2_match_context; +typedef int pcre2_jit_stack; +#endif #include "kwset.h" #include "thread-utils.h" #include "userdiff.h" @@ -65,6 +75,12 @@ struct grep_pat { pcre_jit_stack *pcre1_jit_stack; const unsigned char *pcre1_tables; int pcre1_jit_on; + pcre2_code *pcre2_pattern; + pcre2_match_data *pcre2_match_data; + pcre2_compile_context *pcre2_compile_context; + pcre2_match_context *pcre2_match_context; + pcre2_jit_stack *pcre2_jit_stack; + uint32_t pcre2_jit_on; kwset_t kws; unsigned fixed:1; unsigned ignore_case:1; @@ -128,6 +144,7 @@ struct grep_opt { int extended; int use_reflog_filter; int pcre1; + int pcre2; int relative; int pathname; int null_following_name; diff --git a/t/test-lib.sh b/t/test-lib.sh index ab92c0ebaa..44d4679384 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1011,7 +1011,7 @@ esac test -z "$NO_PERL" && test_set_prereq PERL test -z "$NO_PTHREADS" && test_set_prereq PTHREADS test -z "$NO_PYTHON" && test_set_prereq PYTHON -test -n "$USE_LIBPCRE1" && test_set_prereq PCRE +test -n "$USE_LIBPCRE1$USE_LIBPCRE2" && test_set_prereq PCRE test -z "$NO_GETTEXT" && test_set_prereq GETTEXT # Can we rely on git's output in the C locale? |