From 8d33c3af0b2113091ea2c2c94990d0332c9551e7 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Mon, 16 Aug 2021 14:09:52 -0700 Subject: grep: use submodule-ODB-as-alternate lazy-addition In the parent commit, Git was taught to add submodule ODBs as alternates lazily, but grep does not use this because it computes the path to add directly, not going through add_submodule_odb(). Add an equivalent to add_submodule_odb() that takes the exact ODB path and teach grep to use it. Signed-off-by: Jonathan Tan Reviewed-by: Emily Shaffer Reviewed-by: Matheus Tavares Signed-off-by: Junio C Hamano --- builtin/grep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/grep.c b/builtin/grep.c index 7d2f8e5adb..87bcb934a2 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -450,7 +450,7 @@ static int grep_submodule(struct grep_opt *opt, * store is no longer global and instead is a member of the repository * object. */ - add_to_alternates_memory(subrepo.objects->odb->path); + add_submodule_odb_by_path(subrepo.objects->odb->path); obj_read_unlock(); memcpy(&subopt, opt, sizeof(subopt)); -- cgit v1.2.3 From 50d92b5f03f3c84d581b27cb8fa3a4b8cfbf2567 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Mon, 16 Aug 2021 14:09:53 -0700 Subject: grep: typesafe versions of grep_source_init grep_source_init() can create "struct grep_source" objects and, depending on the value of the type passed, some void-pointer parameters have different meanings. Because one of these types (GREP_SOURCE_OID) will require an additional parameter in a subsequent patch, take the opportunity to increase clarity and type safety by replacing this function with individual functions for each type. Signed-off-by: Jonathan Tan Reviewed-by: Matheus Tavares Signed-off-by: Junio C Hamano --- builtin/grep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'builtin') diff --git a/builtin/grep.c b/builtin/grep.c index 87bcb934a2..e454335e9d 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -333,7 +333,7 @@ static int grep_oid(struct grep_opt *opt, const struct object_id *oid, struct grep_source gs; grep_source_name(opt, filename, tree_name_len, &pathbuf); - grep_source_init(&gs, GREP_SOURCE_OID, pathbuf.buf, path, oid); + grep_source_init_oid(&gs, pathbuf.buf, path, oid); strbuf_release(&pathbuf); if (num_threads > 1) { @@ -359,7 +359,7 @@ static int grep_file(struct grep_opt *opt, const char *filename) struct grep_source gs; grep_source_name(opt, filename, 0, &buf); - grep_source_init(&gs, GREP_SOURCE_FILE, buf.buf, filename, filename); + grep_source_init_file(&gs, buf.buf, filename); strbuf_release(&buf); if (num_threads > 1) { -- cgit v1.2.3 From 78ca584f1c4a720988f6066693b4d2ccde920813 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Mon, 16 Aug 2021 14:09:54 -0700 Subject: grep: read submodule entry with explicit repo Replace an existing parse_object_or_die() call (which implicitly works on the_repository) with a function call that allows a repository to be passed in. There is no such direct equivalent to parse_object_or_die(), but we only need the type of the object, so replace with oid_object_info(). Signed-off-by: Jonathan Tan Reviewed-by: Emily Shaffer Reviewed-by: Matheus Tavares Signed-off-by: Junio C Hamano --- builtin/grep.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'builtin') diff --git a/builtin/grep.c b/builtin/grep.c index e454335e9d..9e61c7c993 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -457,27 +457,27 @@ static int grep_submodule(struct grep_opt *opt, subopt.repo = &subrepo; if (oid) { - struct object *object; + enum object_type object_type; struct tree_desc tree; void *data; unsigned long size; struct strbuf base = STRBUF_INIT; obj_read_lock(); - object = parse_object_or_die(oid, NULL); + object_type = oid_object_info(&subrepo, oid, NULL); obj_read_unlock(); data = read_object_with_reference(&subrepo, - &object->oid, tree_type, + oid, tree_type, &size, NULL); if (!data) - die(_("unable to read tree (%s)"), oid_to_hex(&object->oid)); + die(_("unable to read tree (%s)"), oid_to_hex(oid)); strbuf_addstr(&base, filename); strbuf_addch(&base, '/'); init_tree_desc(&tree, data, size); hit = grep_tree(&subopt, pathspec, &tree, &base, base.len, - object->type == OBJ_COMMIT); + object_type == OBJ_COMMIT); strbuf_release(&base); free(data); } else { -- cgit v1.2.3 From dd45471a3717bcd6561e405371b81928214ad1b5 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Mon, 16 Aug 2021 14:09:55 -0700 Subject: grep: allocate subrepos on heap Currently, struct repository objects corresponding to submodules are allocated on the stack in grep_submodule(). This currently works because they will not be used once grep_submodule() exits, but a subsequent patch will require these structs to be accessible for longer (perhaps even in another thread). Allocate them on the heap and clear them only at the very end. Signed-off-by: Jonathan Tan Reviewed-by: Matheus Tavares Signed-off-by: Junio C Hamano --- builtin/grep.c | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) (limited to 'builtin') diff --git a/builtin/grep.c b/builtin/grep.c index 9e61c7c993..fa7fd08150 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -65,6 +65,9 @@ static int todo_done; /* Has all work items been added? */ static int all_work_added; +static struct repository **repos_to_free; +static size_t repos_to_free_nr, repos_to_free_alloc; + /* This lock protects all the variables above. */ static pthread_mutex_t grep_mutex; @@ -168,6 +171,19 @@ static void work_done(struct work_item *w) grep_unlock(); } +static void free_repos(void) +{ + int i; + + for (i = 0; i < repos_to_free_nr; i++) { + repo_clear(repos_to_free[i]); + free(repos_to_free[i]); + } + FREE_AND_NULL(repos_to_free); + repos_to_free_nr = 0; + repos_to_free_alloc = 0; +} + static void *run(void *arg) { int hit = 0; @@ -415,19 +431,24 @@ static int grep_submodule(struct grep_opt *opt, const struct object_id *oid, const char *filename, const char *path, int cached) { - struct repository subrepo; + struct repository *subrepo; struct repository *superproject = opt->repo; const struct submodule *sub; struct grep_opt subopt; - int hit; + int hit = 0; sub = submodule_from_path(superproject, null_oid(), path); if (!is_submodule_active(superproject, path)) return 0; - if (repo_submodule_init(&subrepo, superproject, sub)) + subrepo = xmalloc(sizeof(*subrepo)); + if (repo_submodule_init(subrepo, superproject, sub)) { + free(subrepo); return 0; + } + ALLOC_GROW(repos_to_free, repos_to_free_nr + 1, repos_to_free_alloc); + repos_to_free[repos_to_free_nr++] = subrepo; /* * NEEDSWORK: repo_read_gitmodules() might call @@ -438,7 +459,7 @@ static int grep_submodule(struct grep_opt *opt, * subrepo's odbs to the in-memory alternates list. */ obj_read_lock(); - repo_read_gitmodules(&subrepo, 0); + repo_read_gitmodules(subrepo, 0); /* * NEEDSWORK: This adds the submodule's object directory to the list of @@ -450,11 +471,11 @@ static int grep_submodule(struct grep_opt *opt, * store is no longer global and instead is a member of the repository * object. */ - add_submodule_odb_by_path(subrepo.objects->odb->path); + add_submodule_odb_by_path(subrepo->objects->odb->path); obj_read_unlock(); memcpy(&subopt, opt, sizeof(subopt)); - subopt.repo = &subrepo; + subopt.repo = subrepo; if (oid) { enum object_type object_type; @@ -464,9 +485,9 @@ static int grep_submodule(struct grep_opt *opt, struct strbuf base = STRBUF_INIT; obj_read_lock(); - object_type = oid_object_info(&subrepo, oid, NULL); + object_type = oid_object_info(subrepo, oid, NULL); obj_read_unlock(); - data = read_object_with_reference(&subrepo, + data = read_object_with_reference(subrepo, oid, tree_type, &size, NULL); if (!data) @@ -484,7 +505,6 @@ static int grep_submodule(struct grep_opt *opt, hit = grep_cache(&subopt, pathspec, cached); } - repo_clear(&subrepo); return hit; } @@ -1182,5 +1202,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) run_pager(&opt, prefix); clear_pathspec(&pathspec); free_grep_patterns(&opt); + free_repos(); return !hit; } -- cgit v1.2.3 From 0693806bf82fb76347e226d8fc5e69077c0a3df5 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Mon, 16 Aug 2021 14:09:56 -0700 Subject: grep: add repository to OID grep sources Record the repository whenever an OID grep source is created, and teach the worker threads to explicitly provide the repository when accessing objects. Signed-off-by: Jonathan Tan Reviewed-by: Matheus Tavares Signed-off-by: Junio C Hamano --- builtin/grep.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'builtin') diff --git a/builtin/grep.c b/builtin/grep.c index fa7fd08150..51278b01fa 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -349,7 +349,7 @@ static int grep_oid(struct grep_opt *opt, const struct object_id *oid, struct grep_source gs; grep_source_name(opt, filename, tree_name_len, &pathbuf); - grep_source_init_oid(&gs, pathbuf.buf, path, oid); + grep_source_init_oid(&gs, pathbuf.buf, path, oid, opt->repo); strbuf_release(&pathbuf); if (num_threads > 1) { @@ -462,14 +462,11 @@ static int grep_submodule(struct grep_opt *opt, repo_read_gitmodules(subrepo, 0); /* - * NEEDSWORK: This adds the submodule's object directory to the list of - * alternates for the single in-memory object store. This has some bad - * consequences for memory (processed objects will never be freed) and - * performance (this increases the number of pack files git has to pay - * attention to, to the sum of the number of pack files in all the - * repositories processed so far). This can be removed once the object - * store is no longer global and instead is a member of the repository - * object. + * All code paths tested by test code no longer need submodule ODBs to + * be added as alternates, but add it to the list just in case. + * Submodule ODBs added through add_submodule_odb_by_path() will be + * lazily registered as alternates when needed (and except in an + * unexpected code interaction, it won't be needed). */ add_submodule_odb_by_path(subrepo->objects->odb->path); obj_read_unlock(); -- cgit v1.2.3