From 15beaaa3d1f6b555900446deb5e376b4f806d734 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 5 Nov 2019 17:07:23 +0000 Subject: Fix spelling errors in code comments Reported-by: Jens Schleusener Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- unpack-trees.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'unpack-trees.c') diff --git a/unpack-trees.c b/unpack-trees.c index 33ea7810d8..28cbd19570 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -275,9 +275,9 @@ static int check_submodule_move_head(const struct cache_entry *ce, } /* - * Preform the loading of the repository's gitmodules file. This function is + * Perform the loading of the repository's gitmodules file. This function is * used by 'check_update()' to perform loading of the gitmodules file in two - * differnt situations: + * different situations: * (1) before removing entries from the working tree if the gitmodules file has * been marked for removal. This situation is specified by 'state' == NULL. * (2) before checking out entries to the working tree if the gitmodules file -- cgit v1.2.3 From 679f2f9fdd2173d87251aee357dd0e46ce977f42 Mon Sep 17 00:00:00 2001 From: Utsav Shah Date: Wed, 20 Nov 2019 08:32:17 +0000 Subject: unpack-trees: skip stat on fsmonitor-valid files The index might be aware that a file hasn't modified via fsmonitor, but unpack-trees did not pay attention to it and checked via ie_match_stat which can be inefficient on certain filesystems. This significantly slows down commands that run oneway_merge, like checkout and reset --hard. This patch makes oneway_merge check whether a file is considered unchanged through fsmonitor and skips ie_match_stat on it. unpack-trees also now correctly copies over fsmonitor validity state from the source index. Finally, for correctness, we force a refresh of fsmonitor state in tweak_fsmonitor. After this change, commands like stash (that use reset --hard internally) go from 8s or more to ~2s on a 250k file repository on a mac. Helped-by: Junio C Hamano Helped-by: Kevin Willford Signed-off-by: Utsav Shah Signed-off-by: Junio C Hamano --- unpack-trees.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'unpack-trees.c') diff --git a/unpack-trees.c b/unpack-trees.c index 62276d4fef..35313d1ac4 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1494,6 +1494,9 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options o->merge_size = len; mark_all_ce_unused(o->src_index); + if (o->src_index->fsmonitor_last_update) + o->result.fsmonitor_last_update = o->src_index->fsmonitor_last_update; + /* * Sparse checkout loop #1: set NEW_SKIP_WORKTREE on existing entries */ @@ -2374,7 +2377,8 @@ int oneway_merge(const struct cache_entry * const *src, if (old && same(old, a)) { int update = 0; - if (o->reset && o->update && !ce_uptodate(old) && !ce_skip_worktree(old)) { + if (o->reset && o->update && !ce_uptodate(old) && !ce_skip_worktree(old) && + !(old->ce_flags & CE_FSMONITOR_VALID)) { struct stat st; if (lstat(old->name, &st) || ie_match_stat(o->src_index, old, &st, CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE)) -- cgit v1.2.3 From e6152e35ff287ab58e2c17065f02cb1be9f4a0aa Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 21 Nov 2019 22:04:39 +0000 Subject: trace2: add region in clear_ce_flags When Git updates the working directory with the sparse-checkout feature enabled, the unpack_trees() method calls clear_ce_flags() to update the skip-wortree bits on the cache entries. This check can be expensive, depending on the patterns used. Add trace2 regions around the method, including some flag information, so we can get granular performance data during experiments. This data will be used to measure improvements to the pattern-matching algorithms for sparse-checkout. Signed-off-by: Jeff Hostetler Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- unpack-trees.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'unpack-trees.c') diff --git a/unpack-trees.c b/unpack-trees.c index 33ea7810d8..01a05ff66d 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1407,15 +1407,23 @@ static int clear_ce_flags(struct index_state *istate, struct pattern_list *pl) { static struct strbuf prefix = STRBUF_INIT; + char label[100]; + int rval; strbuf_reset(&prefix); - return clear_ce_flags_1(istate, + xsnprintf(label, sizeof(label), "clear_ce_flags(0x%08lx,0x%08lx)", + (unsigned long)select_mask, (unsigned long)clear_mask); + trace2_region_enter("unpack_trees", label, the_repository); + rval = clear_ce_flags_1(istate, istate->cache, istate->cache_nr, &prefix, select_mask, clear_mask, pl, 0); + trace2_region_leave("unpack_trees", label, the_repository); + + return rval; } /* -- cgit v1.2.3 From 96cc8ab5318cd57c8bc203b8f064b35883b2386f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:41 +0000 Subject: sparse-checkout: use hashmaps for cone patterns The parent and recursive patterns allowed by the "cone mode" option in sparse-checkout are restrictive enough that we can avoid using the regex parsing. Everything is based on prefix matches, so we can use hashsets to store the prefixes from the sparse-checkout file. When checking a path, we can strip path entries from the path and check the hashset for an exact match. As a test, I created a cone-mode sparse-checkout file for the Linux repository that actually includes every file. This was constructed by taking every folder in the Linux repo and creating the pattern pairs here: /$folder/ !/$folder/*/ This resulted in a sparse-checkout file sith 8,296 patterns. Running 'git read-tree -mu HEAD' on this file had the following performance: core.sparseCheckout=false: 0.21 s (0.00 s) core.sparseCheckout=true: 3.75 s (3.50 s) core.sparseCheckoutCone=true: 0.23 s (0.01 s) The times in parentheses above correspond to the time spent in the first clear_ce_flags() call, according to the trace2 performance traces. While this example is contrived, it demonstrates how these patterns can slow the sparse-checkout feature. Helped-by: Eric Wong Helped-by: Johannes Schindelin Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- unpack-trees.c | 1 + 1 file changed, 1 insertion(+) (limited to 'unpack-trees.c') diff --git a/unpack-trees.c b/unpack-trees.c index 01a05ff66d..a90d71845d 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1482,6 +1482,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options o->skip_sparse_checkout = 1; if (!o->skip_sparse_checkout) { char *sparse = git_pathdup("info/sparse-checkout"); + pl.use_cone_patterns = core_sparse_checkout_cone; if (add_patterns_from_file_to_list(sparse, "", 0, &pl, NULL) < 0) o->skip_sparse_checkout = 1; else -- cgit v1.2.3 From eb42feca974a333e58c2ca0f3cfa8bf0dd421402 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:43 +0000 Subject: unpack-trees: hash less in cone mode The sparse-checkout feature in "cone mode" can use the fact that the recursive patterns are "connected" to the root via parent patterns to decide if a directory is entirely contained in the sparse-checkout or entirely removed. In these cases, we can skip hashing the paths within those directories and simply set the skipworktree bit to the correct value. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- unpack-trees.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) (limited to 'unpack-trees.c') diff --git a/unpack-trees.c b/unpack-trees.c index a90d71845d..c0dca20865 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1283,15 +1283,17 @@ static int clear_ce_flags_dir(struct index_state *istate, struct cache_entry **cache_end; int dtype = DT_DIR; int rc; - enum pattern_match_result ret; - ret = path_matches_pattern_list(prefix->buf, prefix->len, - basename, &dtype, pl, istate); + enum pattern_match_result ret, orig_ret; + orig_ret = path_matches_pattern_list(prefix->buf, prefix->len, + basename, &dtype, pl, istate); strbuf_addch(prefix, '/'); /* If undecided, use matching result of parent dir in defval */ - if (ret == UNDECIDED) + if (orig_ret == UNDECIDED) ret = default_match; + else + ret = orig_ret; for (cache_end = cache; cache_end != cache + nr; cache_end++) { struct cache_entry *ce = *cache_end; @@ -1299,17 +1301,23 @@ static int clear_ce_flags_dir(struct index_state *istate, break; } - /* - * TODO: check pl, if there are no patterns that may conflict - * with ret (iow, we know in advance the incl/excl - * decision for the entire directory), clear flag here without - * calling clear_ce_flags_1(). That function will call - * the expensive path_matches_pattern_list() on every entry. - */ - rc = clear_ce_flags_1(istate, cache, cache_end - cache, - prefix, - select_mask, clear_mask, - pl, ret); + if (pl->use_cone_patterns && orig_ret == MATCHED_RECURSIVE) { + struct cache_entry **ce = cache; + rc = (cache_end - cache) / sizeof(struct cache_entry *); + + while (ce < cache_end) { + (*ce)->ce_flags &= ~clear_mask; + ce++; + } + } else if (pl->use_cone_patterns && orig_ret == NOT_MATCHED) { + rc = (cache_end - cache) / sizeof(struct cache_entry *); + } else { + rc = clear_ce_flags_1(istate, cache, cache_end - cache, + prefix, + select_mask, clear_mask, + pl, ret); + } + strbuf_setlen(prefix, prefix->len - 1); return rc; } -- cgit v1.2.3 From 4dcd4def3ca46fc0e8cb28cbfc815a35dd2c3f1d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:44 +0000 Subject: unpack-trees: add progress to clear_ce_flags() When a large repository has many sparse-checkout patterns, the process for updating the skip-worktree bits can take long enough that a user gets confused why nothing is happening. Update the clear_ce_flags() method to write progress. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- unpack-trees.c | 56 +++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 15 deletions(-) (limited to 'unpack-trees.c') diff --git a/unpack-trees.c b/unpack-trees.c index c0dca20865..8bb684ad62 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1269,7 +1269,8 @@ static int clear_ce_flags_1(struct index_state *istate, struct strbuf *prefix, int select_mask, int clear_mask, struct pattern_list *pl, - enum pattern_match_result default_match); + enum pattern_match_result default_match, + int progress_nr); /* Whole directory matching */ static int clear_ce_flags_dir(struct index_state *istate, @@ -1278,7 +1279,8 @@ static int clear_ce_flags_dir(struct index_state *istate, char *basename, int select_mask, int clear_mask, struct pattern_list *pl, - enum pattern_match_result default_match) + enum pattern_match_result default_match, + int progress_nr) { struct cache_entry **cache_end; int dtype = DT_DIR; @@ -1315,7 +1317,8 @@ static int clear_ce_flags_dir(struct index_state *istate, rc = clear_ce_flags_1(istate, cache, cache_end - cache, prefix, select_mask, clear_mask, - pl, ret); + pl, ret, + progress_nr); } strbuf_setlen(prefix, prefix->len - 1); @@ -1342,7 +1345,8 @@ static int clear_ce_flags_1(struct index_state *istate, struct strbuf *prefix, int select_mask, int clear_mask, struct pattern_list *pl, - enum pattern_match_result default_match) + enum pattern_match_result default_match, + int progress_nr) { struct cache_entry **cache_end = cache + nr; @@ -1356,8 +1360,11 @@ static int clear_ce_flags_1(struct index_state *istate, int len, dtype; enum pattern_match_result ret; + display_progress(istate->progress, progress_nr); + if (select_mask && !(ce->ce_flags & select_mask)) { cache++; + progress_nr++; continue; } @@ -1378,20 +1385,26 @@ static int clear_ce_flags_1(struct index_state *istate, prefix, prefix->buf + prefix->len - len, select_mask, clear_mask, - pl, default_match); + pl, default_match, + progress_nr); /* clear_c_f_dir eats a whole dir already? */ if (processed) { cache += processed; + progress_nr += processed; strbuf_setlen(prefix, prefix->len - len); continue; } strbuf_addch(prefix, '/'); - cache += clear_ce_flags_1(istate, cache, cache_end - cache, - prefix, - select_mask, clear_mask, pl, - default_match); + processed = clear_ce_flags_1(istate, cache, cache_end - cache, + prefix, + select_mask, clear_mask, pl, + default_match, progress_nr); + + cache += processed; + progress_nr += processed; + strbuf_setlen(prefix, prefix->len - len - 1); continue; } @@ -1406,19 +1419,27 @@ static int clear_ce_flags_1(struct index_state *istate, if (ret == MATCHED) ce->ce_flags &= ~clear_mask; cache++; + progress_nr++; } + + display_progress(istate->progress, progress_nr); return nr - (cache_end - cache); } static int clear_ce_flags(struct index_state *istate, int select_mask, int clear_mask, - struct pattern_list *pl) + struct pattern_list *pl, + int show_progress) { static struct strbuf prefix = STRBUF_INIT; char label[100]; int rval; strbuf_reset(&prefix); + if (show_progress) + istate->progress = start_delayed_progress( + _("Updating index flags"), + istate->cache_nr); xsnprintf(label, sizeof(label), "clear_ce_flags(0x%08lx,0x%08lx)", (unsigned long)select_mask, (unsigned long)clear_mask); @@ -1428,9 +1449,10 @@ static int clear_ce_flags(struct index_state *istate, istate->cache_nr, &prefix, select_mask, clear_mask, - pl, 0); + pl, 0, 0); trace2_region_leave("unpack_trees", label, the_repository); + stop_progress(&istate->progress); return rval; } @@ -1439,7 +1461,8 @@ static int clear_ce_flags(struct index_state *istate, */ static void mark_new_skip_worktree(struct pattern_list *pl, struct index_state *istate, - int select_flag, int skip_wt_flag) + int select_flag, int skip_wt_flag, + int show_progress) { int i; @@ -1463,7 +1486,7 @@ static void mark_new_skip_worktree(struct pattern_list *pl, * 2. Widen worktree according to sparse-checkout file. * Matched entries will have skip_wt_flag cleared (i.e. "in") */ - clear_ce_flags(istate, select_flag, skip_wt_flag, pl); + clear_ce_flags(istate, select_flag, skip_wt_flag, pl, show_progress); } static int verify_absent(const struct cache_entry *, @@ -1525,7 +1548,8 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options * Sparse checkout loop #1: set NEW_SKIP_WORKTREE on existing entries */ if (!o->skip_sparse_checkout) - mark_new_skip_worktree(o->pl, o->src_index, 0, CE_NEW_SKIP_WORKTREE); + mark_new_skip_worktree(o->pl, o->src_index, 0, + CE_NEW_SKIP_WORKTREE, o->verbose_update); if (!dfc) dfc = xcalloc(1, cache_entry_size(0)); @@ -1590,7 +1614,9 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options * If the will have NEW_SKIP_WORKTREE, also set CE_SKIP_WORKTREE * so apply_sparse_checkout() won't attempt to remove it from worktree */ - mark_new_skip_worktree(o->pl, &o->result, CE_ADDED, CE_SKIP_WORKTREE | CE_NEW_SKIP_WORKTREE); + mark_new_skip_worktree(o->pl, &o->result, + CE_ADDED, CE_SKIP_WORKTREE | CE_NEW_SKIP_WORKTREE, + o->verbose_update); ret = 0; for (i = 0; i < o->result.cache_nr; i++) { -- cgit v1.2.3 From e091228e17e88b1bc16cb50d5c3aff10dc5119d1 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 21 Nov 2019 22:04:46 +0000 Subject: sparse-checkout: update working directory in-process The sparse-checkout builtin used 'git read-tree -mu HEAD' to update the skip-worktree bits in the index and to update the working directory. This extra process is overly complex, and prone to failure. It also requires that we write our changes to the sparse-checkout file before trying to update the index. Remove this extra process call by creating a direct call to unpack_trees() in the same way 'git read-tree -mu HEAD' does. In addition, provide an in-memory list of patterns so we can avoid reading from the sparse-checkout file. This allows us to test a proposed change to the file before writing to it. An earlier version of this patch included a bug when the 'set' command failed due to the "Sparse checkout leaves no entry on working directory" error. It would not rollback the index.lock file, so the replay of the old sparse-checkout specification would fail. A test in t1091 now covers that scenario. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- unpack-trees.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'unpack-trees.c') diff --git a/unpack-trees.c b/unpack-trees.c index 8bb684ad62..3789a22cf0 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1511,7 +1511,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options memset(&pl, 0, sizeof(pl)); if (!core_apply_sparse_checkout || !o->update) o->skip_sparse_checkout = 1; - if (!o->skip_sparse_checkout) { + if (!o->skip_sparse_checkout && !o->pl) { char *sparse = git_pathdup("info/sparse-checkout"); pl.use_cone_patterns = core_sparse_checkout_cone; if (add_patterns_from_file_to_list(sparse, "", 0, &pl, NULL) < 0) @@ -1684,7 +1684,8 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options done: trace_performance_leave("unpack_trees"); - clear_pattern_list(&pl); + if (!o->keep_pattern_list) + clear_pattern_list(&pl); return ret; return_failed: -- cgit v1.2.3 From cc756edda63769cf6d7acc99e6ad3a9cbb5dc3ec Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 9 Sep 2019 13:56:15 +0200 Subject: unpack-trees: let merged_entry() pass through do_add_entry()'s errors A `git clone` will end with exit code 0 when `merged_entry()` returns a positive value during a call of `unpack_trees()` to `traverse_trees()`. The reason is that `unpack_trees()` will interpret a positive value not to be an error. The problem is, however, that `add_index_entry()` (which is called by `merged_entry()` can report an error, and we really should fail the entire clone in such a case. Let's fix this problem, in preparation for a Windows-specific patch disallowing `mkdir()` with directory names that contain a trailing space (which is illegal on NTFS): we want `git clone` to abort when a path cannot be checked out due to that condition. Signed-off-by: Johannes Schindelin --- unpack-trees.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'unpack-trees.c') diff --git a/unpack-trees.c b/unpack-trees.c index 862cfce661..649d11855e 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1821,7 +1821,8 @@ static int merged_entry(const struct cache_entry *ce, invalidate_ce_path(old, o); } - do_add_entry(o, merge, update, CE_STAGEMASK); + if (do_add_entry(o, merge, update, CE_STAGEMASK) < 0) + return -1; return 1; } -- cgit v1.2.3