From c3a9cecc7f5ea8b36d6943b868122ca0bfbc0f9c Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Thu, 17 Mar 2022 15:55:34 +0000 Subject: t1092: add sparse directory before cone in test repo Add a sparse directory 'before/' containing files 'a' and 'b' to the test repo used in 't/t1092-sparse-checkout-compatibility.sh'. This is meant to ensure that no sparse index integrations rely on the in-cone path(s) being lexicographically first in the repo. Unfortunately, some existing tests do not handle this repo architecture properly: * 'add outside sparse cone' * 'status/add: outside sparse cone' * 'reset with pathspecs inside sparse definition' All three of these are due to the incorrect handling of the 'unpack_trees_options.cache_bottom' when performing a cache diff via 'unpack_trees'. This will be corrected in a future patch; in the meantime, mark the tests with 'test_expect_failure'. Finally, update the 'ls-files' and 'root directory cannot be sparse' tests to include the 'before/' directory in their expected index contents. Co-authored-by: Derrick Stolee Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- t/t1092-sparse-checkout-compatibility.sh | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index dcc0a30d4a..dcd7061fb3 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -16,7 +16,9 @@ test_expect_success 'setup' ' echo "after deep" >e && echo "after folder1" >g && echo "after x" >z && - mkdir folder1 folder2 deep x && + mkdir folder1 folder2 deep before x && + echo "before deep" >before/a && + echo "before deep again" >before/b && mkdir deep/deeper1 deep/deeper2 deep/before deep/later && mkdir deep/deeper1/deepest && mkdir deep/deeper1/deepest2 && @@ -254,6 +256,7 @@ test_expect_success 'root directory cannot be sparse' ' # Verify sparse directories still present, root directory is not sparse cat >expect <<-EOF && + before/ folder1/ folder2/ x/ @@ -337,7 +340,7 @@ test_expect_success 'deep changes during checkout' ' test_all_match git checkout base ' -test_expect_success 'add outside sparse cone' ' +test_expect_failure 'add outside sparse cone' ' init_repos && run_on_sparse mkdir folder1 && @@ -379,7 +382,7 @@ test_expect_success 'commit including unstaged changes' ' test_all_match git status --porcelain=v2 ' -test_expect_success 'status/add: outside sparse cone' ' +test_expect_failure 'status/add: outside sparse cone' ' init_repos && # folder1 is at HEAD, but outside the sparse cone @@ -590,7 +593,7 @@ test_expect_success 'checkout and reset (keep)' ' test_all_match test_must_fail git reset --keep deepest ' -test_expect_success 'reset with pathspecs inside sparse definition' ' +test_expect_failure 'reset with pathspecs inside sparse definition' ' init_repos && write_script edit-contents <<-\EOF && @@ -1444,6 +1447,7 @@ test_expect_success 'ls-files' ' cat >expect <<-\EOF && a + before/ deep/ e folder1- @@ -1491,6 +1495,7 @@ test_expect_success 'ls-files' ' cat >expect <<-\EOF && a + before/ deep/ e folder1- -- cgit v1.2.3 From bfc763df773ccfc10eb38a24caa111d0fcbc5493 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Thu, 17 Mar 2022 15:55:35 +0000 Subject: unpack-trees: increment cache_bottom for sparse directories Correct tracking of the 'cache_bottom' for cases where sparse directories are present in the index. BACKGROUND ---------- The 'unpack_trees_options.cache_bottom' is a variable that tracks the in-progress "bottom" of the cache as 'unpack_trees()' iterates through the contents of the index. Most importantly, this value informs the sequential return values of 'next_cache_entry()' which, in the "diff cache" usage of 'unpack_callback()', are either unpacked as-is or are passed into the diff machinery. The 'cache_bottom' is intended to track the position of the first entry in the index that has not yet been diffed or unpacked. It is advanced in two main ways: either it is incremented when an index entry is marked as "used" (in 'mark_ce_used()'), indicating that it was unpacked or diffed, or when a directory is unpacked, in which case it is increased by an amount equaling the number of index entries inside that tree. In 17a1bb570b (unpack-trees: preserve cache_bottom, 2021-07-14), it was identified that sparse directories posed a problem to the above 'cache_bottom' advancement logic - because a sparse directory was both an index entry that could be "used" and a directory that can be unpacked, the 'cache_bottom' would be incremented too many times. To solve this problem, the 'mark_ce_used()' advancement of 'cache_bottom' was skipped for sparse directories. INCORRECT CACHE_BOTTOM TRACKING ------------------------------- Skipping the 'cache_bottom' advancement for sparse directories in 'mark_ce_used()' breaks down in two cases: 1. When the 'unpack_trees()' operation is *not* a "cache diff" (because the directory contents-based incrementing of 'cache_bottom' does not happen). 2. When a cache diff is performed with a pathspec (because 'unpack_index_entry()' will unpack a sparse directory not matched by the pathspec without performing the directory contents-based increment). The former luckily does not appear to affect 'git' behavior, likely because 'cache_bottom' is largely unused (non-"cache diff" 'unpack_trees()' uses 'find_index_entry()' - rather than 'next_cache_entry()' - to find the index entries to unpack). The latter, however, causes 'cache_bottom' to "lag behind" its intended position by an amount equal to the number of sparse directories unpacked so far with 'unpack_index_entry()'. If a repository is structured such that any sparse directories are ordered lexicographically *after* any pathspec-matching directories, though, this issue won't present any adverse behavior. This was the case with the 't1092-sparse-checkout-compatibility.sh' tests before the addition of the 'before/' sparse directory (ordered *before* the in-cone 'deep/' directory), therefore sidestepping the issue. Once the 'before/' directory was added, though, 'cache_bottom' began to lag behind its intended position, causing 'next_cache_entry()' to return index entries it had already processed and, ultimately, an incorrect diff. CORRECTING CACHE_BOTTOM ----------------------- The problems observed in 't1092' come from 'cache_bottom' lagging behind in cases where the cache tree-based advancement doesn't occur. To solve this, then, the fix in 17a1bb570b is "reversed"; rather than skipping 'cache_bottom' advancement in 'mark_ce_used()', we skip the directory contents-based advancement for sparse directories. Now, every index entry can be accounted for in 'cache_bottom': * if you're working with a single index entry, 'cache_bottom' is incremented in 'mark_ce_used()' * if you're working with a directory that contains index entries (but is not one itself), 'cache_bottom' is incremented by the number of entries in that directory. Finally, change the 'test_expect_failure' tests in 't1092' failing due to this bug back to 'test_expect_success'. Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- t/t1092-sparse-checkout-compatibility.sh | 6 +++--- unpack-trees.c | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index dcd7061fb3..236ab53028 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -340,7 +340,7 @@ test_expect_success 'deep changes during checkout' ' test_all_match git checkout base ' -test_expect_failure 'add outside sparse cone' ' +test_expect_success 'add outside sparse cone' ' init_repos && run_on_sparse mkdir folder1 && @@ -382,7 +382,7 @@ test_expect_success 'commit including unstaged changes' ' test_all_match git status --porcelain=v2 ' -test_expect_failure 'status/add: outside sparse cone' ' +test_expect_success 'status/add: outside sparse cone' ' init_repos && # folder1 is at HEAD, but outside the sparse cone @@ -593,7 +593,7 @@ test_expect_success 'checkout and reset (keep)' ' test_all_match test_must_fail git reset --keep deepest ' -test_expect_failure 'reset with pathspecs inside sparse definition' ' +test_expect_success 'reset with pathspecs inside sparse definition' ' init_repos && write_script edit-contents <<-\EOF && diff --git a/unpack-trees.c b/unpack-trees.c index 2763a029a1..b82c1a9705 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -595,13 +595,6 @@ static void mark_ce_used(struct cache_entry *ce, struct unpack_trees_options *o) { ce->ce_flags |= CE_UNPACKED; - /* - * If this is a sparse directory, don't advance cache_bottom. - * That will be advanced later using the cache-tree data. - */ - if (S_ISSPARSEDIR(ce->ce_mode)) - return; - if (o->cache_bottom < o->src_index->cache_nr && o->src_index->cache[o->cache_bottom] == ce) { int bottom = o->cache_bottom; @@ -1478,7 +1471,14 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str * it does not do any look-ahead, so this is safe. */ if (matches) { - o->cache_bottom += matches; + /* + * Only increment the cache_bottom if the + * directory isn't a sparse directory index + * entry (if it is, it was already incremented) + * in 'mark_ce_used()' + */ + if (!src[0] || !S_ISSPARSEDIR(src[0]->ce_mode)) + o->cache_bottom += matches; return mask; } } -- cgit v1.2.3 From 99430aa12cddf0af85d24316cea4a02dc4a711b6 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Thu, 17 Mar 2022 15:55:36 +0000 Subject: Revert "unpack-trees: improve performance of next_cache_entry" This reverts commit f2a454e0a5 (unpack-trees: improve performance of next_cache_entry, 2021-11-29). The "hint" value was originally needed to improve performance in 'git reset -- ' caused by 'cache_bottom' lagging behind its correct value when using a sparse index. The 'cache_bottom' tracking has since been corrected, removing the need for an additional "pseudo-cache_bottom" tracking variable. Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- unpack-trees.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/unpack-trees.c b/unpack-trees.c index b82c1a9705..7f528d35cc 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -644,24 +644,17 @@ static void mark_ce_used_same_name(struct cache_entry *ce, } } -static struct cache_entry *next_cache_entry(struct unpack_trees_options *o, int *hint) +static struct cache_entry *next_cache_entry(struct unpack_trees_options *o) { const struct index_state *index = o->src_index; int pos = o->cache_bottom; - if (*hint > pos) - pos = *hint; - while (pos < index->cache_nr) { struct cache_entry *ce = index->cache[pos]; - if (!(ce->ce_flags & CE_UNPACKED)) { - *hint = pos + 1; + if (!(ce->ce_flags & CE_UNPACKED)) return ce; - } pos++; } - - *hint = pos; return NULL; } @@ -1409,13 +1402,12 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str /* Are we supposed to look at the index too? */ if (o->merge) { - int hint = -1; while (1) { int cmp; struct cache_entry *ce; if (o->diff_index_cached) - ce = next_cache_entry(o, &hint); + ce = next_cache_entry(o); else ce = find_cache_entry(info, p); @@ -1777,7 +1769,7 @@ static int verify_absent(const struct cache_entry *, int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options *o) { struct repository *repo = the_repository; - int i, hint, ret; + int i, ret; static struct cache_entry *dfc; struct pattern_list pl; int free_pattern_list = 0; @@ -1869,15 +1861,13 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options info.pathspec = o->pathspec; if (o->prefix) { - hint = -1; - /* * Unpack existing index entries that sort before the * prefix the tree is spliced into. Note that o->merge * is always true in this case. */ while (1) { - struct cache_entry *ce = next_cache_entry(o, &hint); + struct cache_entry *ce = next_cache_entry(o); if (!ce) break; if (ce_in_traverse_path(ce, &info)) @@ -1898,9 +1888,8 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options /* Any left-over entries in the index? */ if (o->merge) { - hint = -1; while (1) { - struct cache_entry *ce = next_cache_entry(o, &hint); + struct cache_entry *ce = next_cache_entry(o); if (!ce) break; if (unpack_index_entry(ce, o) < 0) -- cgit v1.2.3