From 50d92b5f03f3c84d581b27cb8fa3a4b8cfbf2567 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Mon, 16 Aug 2021 14:09:53 -0700 Subject: grep: typesafe versions of grep_source_init grep_source_init() can create "struct grep_source" objects and, depending on the value of the type passed, some void-pointer parameters have different meanings. Because one of these types (GREP_SOURCE_OID) will require an additional parameter in a subsequent patch, take the opportunity to increase clarity and type safety by replacing this function with individual functions for each type. Signed-off-by: Jonathan Tan Reviewed-by: Matheus Tavares Signed-off-by: Junio C Hamano --- grep.c | 46 ++++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 18 deletions(-) (limited to 'grep.c') diff --git a/grep.c b/grep.c index 424a39591b..8a8105c2eb 100644 --- a/grep.c +++ b/grep.c @@ -1825,14 +1825,24 @@ int grep_source(struct grep_opt *opt, struct grep_source *gs) return grep_source_1(opt, gs, 0); } +static void grep_source_init_buf(struct grep_source *gs, char *buf, + unsigned long size) +{ + gs->type = GREP_SOURCE_BUF; + gs->name = NULL; + gs->path = NULL; + gs->buf = buf; + gs->size = size; + gs->driver = NULL; + gs->identifier = NULL; +} + int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size) { struct grep_source gs; int r; - grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL); - gs.buf = buf; - gs.size = size; + grep_source_init_buf(&gs, buf, size); r = grep_source(opt, &gs); @@ -1840,28 +1850,28 @@ int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size) return r; } -void grep_source_init(struct grep_source *gs, enum grep_source_type type, - const char *name, const char *path, - const void *identifier) +void grep_source_init_file(struct grep_source *gs, const char *name, + const char *path) { - gs->type = type; + gs->type = GREP_SOURCE_FILE; gs->name = xstrdup_or_null(name); gs->path = xstrdup_or_null(path); gs->buf = NULL; gs->size = 0; gs->driver = NULL; + gs->identifier = xstrdup(path); +} - switch (type) { - case GREP_SOURCE_FILE: - gs->identifier = xstrdup(identifier); - break; - case GREP_SOURCE_OID: - gs->identifier = oiddup(identifier); - break; - case GREP_SOURCE_BUF: - gs->identifier = NULL; - break; - } +void grep_source_init_oid(struct grep_source *gs, const char *name, + const char *path, const struct object_id *oid) +{ + gs->type = GREP_SOURCE_OID; + gs->name = xstrdup_or_null(name); + gs->path = xstrdup_or_null(path); + gs->buf = NULL; + gs->size = 0; + gs->driver = NULL; + gs->identifier = oiddup(oid); } void grep_source_clear(struct grep_source *gs) -- cgit v1.2.3 From 0693806bf82fb76347e226d8fc5e69077c0a3df5 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Mon, 16 Aug 2021 14:09:56 -0700 Subject: grep: add repository to OID grep sources Record the repository whenever an OID grep source is created, and teach the worker threads to explicitly provide the repository when accessing objects. Signed-off-by: Jonathan Tan Reviewed-by: Matheus Tavares Signed-off-by: Junio C Hamano --- grep.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'grep.c') diff --git a/grep.c b/grep.c index 8a8105c2eb..79598f245f 100644 --- a/grep.c +++ b/grep.c @@ -1863,7 +1863,8 @@ void grep_source_init_file(struct grep_source *gs, const char *name, } void grep_source_init_oid(struct grep_source *gs, const char *name, - const char *path, const struct object_id *oid) + const char *path, const struct object_id *oid, + struct repository *repo) { gs->type = GREP_SOURCE_OID; gs->name = xstrdup_or_null(name); @@ -1872,6 +1873,7 @@ void grep_source_init_oid(struct grep_source *gs, const char *name, gs->size = 0; gs->driver = NULL; gs->identifier = oiddup(oid); + gs->repo = repo; } void grep_source_clear(struct grep_source *gs) @@ -1900,7 +1902,8 @@ static int grep_source_load_oid(struct grep_source *gs) { enum object_type type; - gs->buf = read_object_file(gs->identifier, &type, &gs->size); + gs->buf = repo_read_object_file(gs->repo, gs->identifier, &type, + &gs->size); if (!gs->buf) return error(_("'%s': unable to read %s"), gs->name, -- cgit v1.2.3 From cc8e26ee8d56dbdb442796a43aa7f30d3684b036 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 20 Sep 2021 23:46:56 -0400 Subject: grep: stop modifying buffer in strip_timestamp When grepping for headers in commit objects, we receive individual lines (e.g., "author Name 1234 -0000"), and then strip off the timestamp to do our match. We do so by writing a NUL byte over the whitespace separator, and then remembering to restore it later. We had to do it this way when this was added back in a4d7d2c6db (log --author/--committer: really match only with name part, 2008-09-04), because we fed the result directly to regexec(), which expects a NUL-terminated string. But since b7d36ffca0 (regex: use regexec_buf(), 2016-09-21), we have a function which can match part of a buffer. So instead of modifying the string, we can instead just move the "eol" pointer, and the rest of the code will do the right thing. This will let further patches mark more buffers as "const". Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- grep.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'grep.c') diff --git a/grep.c b/grep.c index 79598f245f..5b1f2da4d3 100644 --- a/grep.c +++ b/grep.c @@ -922,20 +922,16 @@ static int patmatch(struct grep_pat *p, char *line, char *eol, return hit; } -static int strip_timestamp(char *bol, char **eol_p) +static void strip_timestamp(char *bol, char **eol_p) { char *eol = *eol_p; - int ch; while (bol < --eol) { if (*eol != '>') continue; *eol_p = ++eol; - ch = *eol; - *eol = '\0'; - return ch; + break; } - return 0; } static struct { @@ -952,7 +948,6 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, regmatch_t *pmatch, int eflags) { int hit = 0; - int saved_ch = 0; const char *start = bol; if ((p->token != GREP_PATTERN) && @@ -971,7 +966,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, switch (p->field) { case GREP_HEADER_AUTHOR: case GREP_HEADER_COMMITTER: - saved_ch = strip_timestamp(bol, &eol); + strip_timestamp(bol, &eol); break; default: break; @@ -1021,8 +1016,6 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, goto again; } } - if (p->token == GREP_PATTERN_HEAD && saved_ch) - *eol = saved_ch; if (hit) { pmatch[0].rm_so += bol - start; pmatch[0].rm_eo += bol - start; -- cgit v1.2.3 From 995e525b1729ada354e443f16e1c0fad59df25a8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 20 Sep 2021 23:48:09 -0400 Subject: grep: stop modifying buffer in show_line() When showing lines via grep (or looking for funcnames), we call show_line() on a multi-line buffer. It finds the end of line and marks it with a NUL. However, we don't need to do so, as the resulting line is only used along with its "eol" marker: - we pass both to next_match(), which takes care to look at only the bytes we specified - we pass the line to output_color() without its matching eol marker. However, we do use the "match" struct we got from next_match() to tell it how many bytes to look at (which can never exceed the string we passed it). So we can stop setting and restoring this NUL marker. That makes the code simpler, and will allow us to take a const buffer in a future patch. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- grep.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'grep.c') diff --git a/grep.c b/grep.c index 5b1f2da4d3..70af01d1c1 100644 --- a/grep.c +++ b/grep.c @@ -1239,7 +1239,6 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol, if (opt->color || opt->only_matching) { regmatch_t match; enum grep_context ctx = GREP_CONTEXT_BODY; - int ch = *eol; int eflags = 0; if (opt->color) { @@ -1254,7 +1253,6 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol, else if (sign == '=') line_color = opt->colors[GREP_COLOR_FUNCTION]; } - *eol = '\0'; while (next_match(opt, bol, eol, ctx, &match, eflags)) { if (match.rm_so == match.rm_eo) break; @@ -1272,7 +1270,6 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol, rest -= match.rm_eo; eflags = REG_NOTBOL; } - *eol = ch; } if (!opt->only_matching) { output_color(opt, bol, rest, line_color); -- cgit v1.2.3 From f84e79ff4bffbcd9de85adc270f5164a6b024d34 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 20 Sep 2021 23:48:44 -0400 Subject: grep: stop modifying buffer in grep_source_1() We find the end of each matching line of a buffer, and then temporarily write a NUL to turn it into a regular C string. But we don't need to do so, because the only thing we do in the interim is pass the line and its length (via an "eol" pointer) to match_line(). And that function should only look at the bytes we passed it, whether it has a terminating NUL or not. We can drop this temporary write in order to simplify the code and make it easier to use const buffers in more of grep.c. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- grep.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'grep.c') diff --git a/grep.c b/grep.c index 70af01d1c1..32c4641443 100644 --- a/grep.c +++ b/grep.c @@ -1616,7 +1616,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle bol = gs->buf; left = gs->size; while (left) { - char *eol, ch; + char *eol; int hit; ssize_t cno; ssize_t col = -1, icol = -1; @@ -1637,14 +1637,11 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle && look_ahead(opt, &left, &lno, &bol)) break; eol = end_of_line(bol, &left); - ch = *eol; - *eol = 0; if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol)) ctx = GREP_CONTEXT_BODY; hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits); - *eol = ch; if (collect_hits) goto next_line; -- cgit v1.2.3 From 1a845fbc48f2613c0daab717ee934e066e65723d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 20 Sep 2021 23:49:49 -0400 Subject: grep: mark "haystack" buffers as const When we're grepping in a buffer, we don't need to modify it. So we can take "const char *" buffers, rather than "char *". This can avoid some awkward casts in our callers, and make our expectations more clear (we will not take ownership of the memory, nor will we ever write to it). These spots don't all necessarily have to be converted at the same time, but some of them are dependent on others, because we pass pointers-to-pointers in a few cases. And none of this should change any behavior, since we're just adding "const" qualifiers (and likewise, the compiler will let us know if we missed any spots). So it's relatively low-risk to just do this all at once. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- grep.c | 61 +++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 26 deletions(-) (limited to 'grep.c') diff --git a/grep.c b/grep.c index 32c4641443..9603ac119d 100644 --- a/grep.c +++ b/grep.c @@ -867,7 +867,7 @@ void free_grep_patterns(struct grep_opt *opt) free_pattern_expr(opt->pattern_expression); } -static char *end_of_line(char *cp, unsigned long *left) +static const char *end_of_line(const char *cp, unsigned long *left) { unsigned long l = *left; while (l && *cp != '\n') { @@ -908,7 +908,8 @@ static void show_name(struct grep_opt *opt, const char *name) opt->output(opt, opt->null_following_name ? "\0" : "\n", 1); } -static int patmatch(struct grep_pat *p, char *line, char *eol, +static int patmatch(struct grep_pat *p, + const char *line, const char *eol, regmatch_t *match, int eflags) { int hit; @@ -922,9 +923,9 @@ static int patmatch(struct grep_pat *p, char *line, char *eol, return hit; } -static void strip_timestamp(char *bol, char **eol_p) +static void strip_timestamp(const char *bol, const char **eol_p) { - char *eol = *eol_p; + const char *eol = *eol_p; while (bol < --eol) { if (*eol != '>') @@ -943,7 +944,8 @@ static struct { { "reflog ", 7 }, }; -static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, +static int match_one_pattern(struct grep_pat *p, + const char *bol, const char *eol, enum grep_context ctx, regmatch_t *pmatch, int eflags) { @@ -1023,8 +1025,9 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, return hit; } -static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x, char *bol, - char *eol, enum grep_context ctx, ssize_t *col, +static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x, + const char *bol, const char *eol, + enum grep_context ctx, ssize_t *col, ssize_t *icol, int collect_hits) { int h = 0; @@ -1091,7 +1094,8 @@ static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x, char *bol, return h; } -static int match_expr(struct grep_opt *opt, char *bol, char *eol, +static int match_expr(struct grep_opt *opt, + const char *bol, const char *eol, enum grep_context ctx, ssize_t *col, ssize_t *icol, int collect_hits) { @@ -1099,7 +1103,8 @@ static int match_expr(struct grep_opt *opt, char *bol, char *eol, return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits); } -static int match_line(struct grep_opt *opt, char *bol, char *eol, +static int match_line(struct grep_opt *opt, + const char *bol, const char *eol, ssize_t *col, ssize_t *icol, enum grep_context ctx, int collect_hits) { @@ -1131,7 +1136,8 @@ static int match_line(struct grep_opt *opt, char *bol, char *eol, return hit; } -static int match_next_pattern(struct grep_pat *p, char *bol, char *eol, +static int match_next_pattern(struct grep_pat *p, + const char *bol, const char *eol, enum grep_context ctx, regmatch_t *pmatch, int eflags) { @@ -1152,7 +1158,8 @@ static int match_next_pattern(struct grep_pat *p, char *bol, char *eol, return 1; } -static int next_match(struct grep_opt *opt, char *bol, char *eol, +static int next_match(struct grep_opt *opt, + const char *bol, const char *eol, enum grep_context ctx, regmatch_t *pmatch, int eflags) { struct grep_pat *p; @@ -1208,7 +1215,8 @@ static void show_line_header(struct grep_opt *opt, const char *name, } } -static void show_line(struct grep_opt *opt, char *bol, char *eol, +static void show_line(struct grep_opt *opt, + const char *bol, const char *eol, const char *name, unsigned lno, ssize_t cno, char sign) { int rest = eol - bol; @@ -1297,7 +1305,8 @@ static inline void grep_attr_unlock(void) pthread_mutex_unlock(&grep_attr_mutex); } -static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bol, char *eol) +static int match_funcname(struct grep_opt *opt, struct grep_source *gs, + const char *bol, const char *eol) { xdemitconf_t *xecfg = opt->priv; if (xecfg && !xecfg->find_func) { @@ -1324,10 +1333,10 @@ static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bo } static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs, - char *bol, unsigned lno) + const char *bol, unsigned lno) { while (bol > gs->buf) { - char *eol = --bol; + const char *eol = --bol; while (bol > gs->buf && bol[-1] != '\n') bol--; @@ -1346,7 +1355,7 @@ static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs, static int is_empty_line(const char *bol, const char *eol); static void show_pre_context(struct grep_opt *opt, struct grep_source *gs, - char *bol, char *end, unsigned lno) + const char *bol, const char *end, unsigned lno) { unsigned cur = lno, from = 1, funcname_lno = 0, orig_from; int funcname_needed = !!opt->funcname, comment_needed = 0; @@ -1366,8 +1375,8 @@ static void show_pre_context(struct grep_opt *opt, struct grep_source *gs, /* Rewind. */ while (bol > gs->buf && cur > from) { - char *next_bol = bol; - char *eol = --bol; + const char *next_bol = bol; + const char *eol = --bol; while (bol > gs->buf && bol[-1] != '\n') bol--; @@ -1398,7 +1407,7 @@ static void show_pre_context(struct grep_opt *opt, struct grep_source *gs, /* Back forward. */ while (cur < lno) { - char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-'; + const char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-'; while (*eol != '\n') eol++; @@ -1426,12 +1435,12 @@ static int should_lookahead(struct grep_opt *opt) static int look_ahead(struct grep_opt *opt, unsigned long *left_p, unsigned *lno_p, - char **bol_p) + const char **bol_p) { unsigned lno = *lno_p; - char *bol = *bol_p; + const char *bol = *bol_p; struct grep_pat *p; - char *sp, *last_bol; + const char *sp, *last_bol; regoff_t earliest = -1; for (p = opt->pattern_list; p; p = p->next) { @@ -1533,8 +1542,8 @@ static int is_empty_line(const char *bol, const char *eol) static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits) { - char *bol; - char *peek_bol = NULL; + const char *bol; + const char *peek_bol = NULL; unsigned long left; unsigned lno = 1; unsigned last_hit = 0; @@ -1616,7 +1625,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle bol = gs->buf; left = gs->size; while (left) { - char *eol; + const char *eol; int hit; ssize_t cno; ssize_t col = -1, icol = -1; @@ -1700,7 +1709,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle } if (show_function && (!peek_bol || peek_bol < bol)) { unsigned long peek_left = left; - char *peek_eol = eol; + const char *peek_eol = eol; /* * Trailing empty lines are not interesting. -- cgit v1.2.3 From 1e66871608d1f6f4cd66e899ee33755bbf6deafa Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 20 Sep 2021 23:51:28 -0400 Subject: grep: store grep_source buffer as const Our grep_buffer() function takes a non-const buffer, which is confusing: we don't take ownership of nor write to the buffer. This mostly comes from the fact that the underlying grep_source struct in which we store the buffer uses non-const pointer. The memory pointed to by the struct is sometimes owned by us (for FILE or OID sources), and sometimes not (for BUF sources). Let's store it as const, which lets us err on the side of caution (i.e., the compiler will warn us if any of our code writes to or tries to free it). As a result, we must annotate the one place where we do free it by casting away the constness. But that's a small price to pay for the extra safety and clarity elsewhere (and indeed, it already had a comment explaining why GREP_SOURCE_BUF _didn't_ free it). And then we can mark grep_buffer() as taking a const buffer. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- grep.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'grep.c') diff --git a/grep.c b/grep.c index 9603ac119d..14fe8a0fd2 100644 --- a/grep.c +++ b/grep.c @@ -1821,7 +1821,8 @@ int grep_source(struct grep_opt *opt, struct grep_source *gs) return grep_source_1(opt, gs, 0); } -static void grep_source_init_buf(struct grep_source *gs, char *buf, +static void grep_source_init_buf(struct grep_source *gs, + const char *buf, unsigned long size) { gs->type = GREP_SOURCE_BUF; @@ -1833,7 +1834,7 @@ static void grep_source_init_buf(struct grep_source *gs, char *buf, gs->identifier = NULL; } -int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size) +int grep_buffer(struct grep_opt *opt, const char *buf, unsigned long size) { struct grep_source gs; int r; @@ -1885,7 +1886,9 @@ void grep_source_clear_data(struct grep_source *gs) switch (gs->type) { case GREP_SOURCE_FILE: case GREP_SOURCE_OID: - FREE_AND_NULL(gs->buf); + /* these types own the buffer */ + free((char *)gs->buf); + gs->buf = NULL; gs->size = 0; break; case GREP_SOURCE_BUF: -- cgit v1.2.3