diff options
Diffstat (limited to 'grep.c')
-rw-r--r-- | grep.c | 200 |
1 files changed, 150 insertions, 50 deletions
@@ -28,9 +28,27 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat, p->next = NULL; } +static int is_fixed(const char *s) +{ + while (*s && !is_regex_special(*s)) + s++; + return !*s; +} + static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { - int err = regcomp(&p->regexp, p->pattern, opt->regflags); + int err; + + p->word_regexp = opt->word_regexp; + + if (opt->fixed || is_fixed(p->pattern)) + p->fixed = 1; + if (opt->regflags & REG_ICASE) + p->fixed = 0; + if (p->fixed) + return; + + err = regcomp(&p->regexp, p->pattern, opt->regflags); if (err) { char errbuf[1024]; char where[1024]; @@ -161,8 +179,7 @@ void compile_grep_patterns(struct grep_opt *opt) case GREP_PATTERN: /* atom */ case GREP_PATTERN_HEAD: case GREP_PATTERN_BODY: - if (!opt->fixed) - compile_regexp(p, opt); + compile_regexp(p, opt); break; default: opt->extended = 1; @@ -177,7 +194,8 @@ void compile_grep_patterns(struct grep_opt *opt) * A classic recursive descent parser would do. */ p = opt->pattern_list; - opt->pattern_expression = compile_pattern_expr(&p); + if (p) + opt->pattern_expression = compile_pattern_expr(&p); if (p) die("incomplete pattern expression: %s", p->pattern); } @@ -238,14 +256,9 @@ static int word_char(char ch) return isalnum(ch) || ch == '_'; } -static void show_line(struct grep_opt *opt, const char *bol, const char *eol, - const char *name, unsigned lno, char sign) +static void show_name(struct grep_opt *opt, const char *name) { - if (opt->pathname) - printf("%s%c", name, sign); - if (opt->linenum) - printf("%d%c", lno, sign); - printf("%.*s\n", (int)(eol-bol), bol); + printf("%s%c", name, opt->null_following_name ? '\0' : '\n'); } static int fixmatch(const char *pattern, char *line, regmatch_t *match) @@ -286,12 +299,13 @@ static struct { { "committer ", 10 }, }; -static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx) +static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, + enum grep_context ctx, + regmatch_t *pmatch, int eflags) { int hit = 0; - int at_true_bol = 1; int saved_ch = 0; - regmatch_t pmatch[10]; + const char *start = bol; if ((p->token != GREP_PATTERN) && ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD))) @@ -310,18 +324,14 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol } again: - if (!opt->fixed) { - regex_t *exp = &p->regexp; - hit = !regexec(exp, bol, ARRAY_SIZE(pmatch), - pmatch, 0); - } - else { + if (p->fixed) hit = !fixmatch(p->pattern, bol, pmatch); - } + else + hit = !regexec(&p->regexp, bol, 1, pmatch, eflags); - if (hit && opt->word_regexp) { + if (hit && p->word_regexp) { if ((pmatch[0].rm_so < 0) || - (eol - bol) <= pmatch[0].rm_so || + (eol - bol) < pmatch[0].rm_so || (pmatch[0].rm_eo < 0) || (eol - bol) < pmatch[0].rm_eo) die("regexp returned nonsense"); @@ -332,7 +342,7 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol * either end of the line, or at word boundary * (i.e. the next char must not be a word char). */ - if ( ((pmatch[0].rm_so == 0 && at_true_bol) || + if ( ((pmatch[0].rm_so == 0) || !word_char(bol[pmatch[0].rm_so-1])) && ((pmatch[0].rm_eo == (eol-bol)) || !word_char(bol[pmatch[0].rm_eo])) ) @@ -340,59 +350,66 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol else hit = 0; + /* Words consist of at least one character. */ + if (pmatch->rm_so == pmatch->rm_eo) + hit = 0; + if (!hit && pmatch[0].rm_so + bol + 1 < eol) { /* There could be more than one match on the * line, and the first match might not be * strict word match. But later ones could be! + * Forward to the next possible start, i.e. the + * next position following a non-word char. */ bol = pmatch[0].rm_so + bol + 1; - at_true_bol = 0; - goto again; + while (word_char(bol[-1]) && bol < eol) + bol++; + eflags |= REG_NOTBOL; + if (bol < eol) + goto again; } } if (p->token == GREP_PATTERN_HEAD && saved_ch) *eol = saved_ch; + if (hit) { + pmatch[0].rm_so += bol - start; + pmatch[0].rm_eo += bol - start; + } return hit; } -static int match_expr_eval(struct grep_opt *o, - struct grep_expr *x, - char *bol, char *eol, - enum grep_context ctx, - int collect_hits) +static int match_expr_eval(struct grep_expr *x, char *bol, char *eol, + enum grep_context ctx, int collect_hits) { int h = 0; + regmatch_t match; if (!x) die("Not a valid grep expression"); switch (x->node) { case GREP_NODE_ATOM: - h = match_one_pattern(o, x->u.atom, bol, eol, ctx); + h = match_one_pattern(x->u.atom, bol, eol, ctx, &match, 0); break; case GREP_NODE_NOT: - h = !match_expr_eval(o, x->u.unary, bol, eol, ctx, 0); + h = !match_expr_eval(x->u.unary, bol, eol, ctx, 0); break; case GREP_NODE_AND: - if (!collect_hits) - return (match_expr_eval(o, x->u.binary.left, - bol, eol, ctx, 0) && - match_expr_eval(o, x->u.binary.right, - bol, eol, ctx, 0)); - h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0); - h &= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 0); + if (!match_expr_eval(x->u.binary.left, bol, eol, ctx, 0)) + return 0; + h = match_expr_eval(x->u.binary.right, bol, eol, ctx, 0); break; case GREP_NODE_OR: if (!collect_hits) - return (match_expr_eval(o, x->u.binary.left, + return (match_expr_eval(x->u.binary.left, bol, eol, ctx, 0) || - match_expr_eval(o, x->u.binary.right, + match_expr_eval(x->u.binary.right, bol, eol, ctx, 0)); - h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0); + h = match_expr_eval(x->u.binary.left, bol, eol, ctx, 0); x->u.binary.left->hit |= h; - h |= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 1); + h |= match_expr_eval(x->u.binary.right, bol, eol, ctx, 1); break; default: - die("Unexpected node type (internal error) %d\n", x->node); + die("Unexpected node type (internal error) %d", x->node); } if (collect_hits) x->hit |= h; @@ -403,24 +420,106 @@ static int match_expr(struct grep_opt *opt, char *bol, char *eol, enum grep_context ctx, int collect_hits) { struct grep_expr *x = opt->pattern_expression; - return match_expr_eval(opt, x, bol, eol, ctx, collect_hits); + return match_expr_eval(x, bol, eol, ctx, collect_hits); } static int match_line(struct grep_opt *opt, char *bol, char *eol, enum grep_context ctx, int collect_hits) { struct grep_pat *p; + regmatch_t match; + if (opt->extended) return match_expr(opt, bol, eol, ctx, collect_hits); /* we do not call with collect_hits without being extended */ for (p = opt->pattern_list; p; p = p->next) { - if (match_one_pattern(opt, p, bol, eol, ctx)) + if (match_one_pattern(p, bol, eol, ctx, &match, 0)) return 1; } return 0; } +static int match_next_pattern(struct grep_pat *p, char *bol, char *eol, + enum grep_context ctx, + regmatch_t *pmatch, int eflags) +{ + regmatch_t match; + + if (!match_one_pattern(p, bol, eol, ctx, &match, eflags)) + return 0; + if (match.rm_so < 0 || match.rm_eo < 0) + return 0; + if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) { + if (match.rm_so > pmatch->rm_so) + return 1; + if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo) + return 1; + } + pmatch->rm_so = match.rm_so; + pmatch->rm_eo = match.rm_eo; + return 1; +} + +static int next_match(struct grep_opt *opt, char *bol, char *eol, + enum grep_context ctx, regmatch_t *pmatch, int eflags) +{ + struct grep_pat *p; + int hit = 0; + + pmatch->rm_so = pmatch->rm_eo = -1; + if (bol < eol) { + for (p = opt->pattern_list; p; p = p->next) { + switch (p->token) { + case GREP_PATTERN: /* atom */ + case GREP_PATTERN_HEAD: + case GREP_PATTERN_BODY: + hit |= match_next_pattern(p, bol, eol, ctx, + pmatch, eflags); + break; + default: + break; + } + } + } + return hit; +} + +static void show_line(struct grep_opt *opt, char *bol, char *eol, + const char *name, unsigned lno, char sign) +{ + int rest = eol - bol; + + if (opt->null_following_name) + sign = '\0'; + if (opt->pathname) + printf("%s%c", name, sign); + if (opt->linenum) + printf("%d%c", lno, sign); + if (opt->color) { + regmatch_t match; + enum grep_context ctx = GREP_CONTEXT_BODY; + int ch = *eol; + int eflags = 0; + + *eol = '\0'; + while (next_match(opt, bol, eol, ctx, &match, eflags)) { + if (match.rm_so == match.rm_eo) + break; + printf("%.*s%s%.*s%s", + (int)match.rm_so, bol, + opt->color_match, + (int)(match.rm_eo - match.rm_so), bol + match.rm_so, + GIT_COLOR_RESET); + bol += match.rm_eo; + rest -= match.rm_eo; + eflags = REG_NOTBOL; + } + *eol = ch; + } + printf("%.*s\n", rest, bol); +} + static int grep_buffer_1(struct grep_opt *opt, const char *name, char *buf, unsigned long size, int collect_hits) { @@ -493,7 +592,7 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name, return 1; } if (opt->name_only) { - printf("%s\n", name); + show_name(opt, name); return 1; } /* Hit at this line. If we haven't shown the @@ -559,7 +658,7 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name, return 0; if (opt->unmatch_name_only) { /* We did not see any hit, so we want to show this */ - printf("%s\n", name); + show_name(opt, name); return 1; } @@ -569,7 +668,8 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name, * make it another option? For now suppress them. */ if (opt->count && count) - printf("%s:%u\n", name, count); + printf("%s%c%u\n", name, + opt->null_following_name ? '\0' : ':', count); return !!last_hit; } |