summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/technical/index-format.txt42
-rw-r--r--cache-tree.c30
-rwxr-xr-xt/t7104-reset-hard.sh2
-rw-r--r--tree-walk.c33
-rw-r--r--unpack-trees.c5
5 files changed, 94 insertions, 18 deletions
diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt
index 69edf46c03..b633482b1b 100644
--- a/Documentation/technical/index-format.txt
+++ b/Documentation/technical/index-format.txt
@@ -26,7 +26,7 @@ Git index format
Extensions are identified by signature. Optional extensions can
be ignored if Git does not understand them.
- Git currently supports cached tree and resolve undo extensions.
+ Git currently supports cache tree and resolve undo extensions.
4-byte extension signature. If the first byte is 'A'..'Z' the
extension is optional and can be ignored.
@@ -136,14 +136,35 @@ Git index format
== Extensions
-=== Cached tree
-
- Cached tree extension contains pre-computed hashes for trees that can
- be derived from the index. It helps speed up tree object generation
- from index for a new commit.
-
- When a path is updated in index, the path must be invalidated and
- removed from tree cache.
+=== Cache tree
+
+ Since the index does not record entries for directories, the cache
+ entries cannot describe tree objects that already exist in the object
+ database for regions of the index that are unchanged from an existing
+ commit. The cache tree extension stores a recursive tree structure that
+ describes the trees that already exist and completely match sections of
+ the cache entries. This speeds up tree object generation from the index
+ for a new commit by only computing the trees that are "new" to that
+ commit. It also assists when comparing the index to another tree, such
+ as `HEAD^{tree}`, since sections of the index can be skipped when a tree
+ comparison demonstrates equality.
+
+ The recursive tree structure uses nodes that store a number of cache
+ entries, a list of subnodes, and an object ID (OID). The OID references
+ the existing tree for that node, if it is known to exist. The subnodes
+ correspond to subdirectories that themselves have cache tree nodes. The
+ number of cache entries corresponds to the number of cache entries in
+ the index that describe paths within that tree's directory.
+
+ The extension tracks the full directory structure in the cache tree
+ extension, but this is generally smaller than the full cache entry list.
+
+ When a path is updated in index, Git invalidates all nodes of the
+ recursive cache tree corresponding to the parent directories of that
+ path. We store these tree nodes as being "invalid" by using "-1" as the
+ number of cache entries. Invalid nodes still store a span of index
+ entries, allowing Git to focus its efforts when reconstructing a full
+ cache tree.
The signature for this extension is { 'T', 'R', 'E', 'E' }.
@@ -174,7 +195,8 @@ Git index format
first entry represents the root level of the repository, followed by the
first subtree--let's call this A--of the root level (with its name
relative to the root level), followed by the first subtree of A (with
- its name relative to A), ...
+ its name relative to A), and so on. The specified number of subtrees
+ indicates when the current level of the recursive stack is complete.
=== Resolve undo
diff --git a/cache-tree.c b/cache-tree.c
index a537a806c1..3f1a8d4f1b 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -185,12 +185,14 @@ static int verify_cache(struct cache_entry **cache,
* the cache is sorted. Also path can appear only once,
* which means conflicting one would immediately follow.
*/
- const char *this_name = cache[i]->name;
- const char *next_name = cache[i+1]->name;
- int this_len = strlen(this_name);
- if (this_len < strlen(next_name) &&
- strncmp(this_name, next_name, this_len) == 0 &&
- next_name[this_len] == '/') {
+ const struct cache_entry *this_ce = cache[i];
+ const struct cache_entry *next_ce = cache[i + 1];
+ const char *this_name = this_ce->name;
+ const char *next_name = next_ce->name;
+ int this_len = ce_namelen(this_ce);
+ if (this_len < ce_namelen(next_ce) &&
+ next_name[this_len] == '/' &&
+ strncmp(this_name, next_name, this_len) == 0) {
if (10 < ++funny) {
fprintf(stderr, "...\n");
break;
@@ -442,7 +444,9 @@ int cache_tree_update(struct index_state *istate, int flags)
if (i)
return i;
trace_performance_enter();
+ trace2_region_enter("cache_tree", "update", the_repository);
i = update_one(it, cache, entries, "", 0, &skip, flags);
+ trace2_region_leave("cache_tree", "update", the_repository);
trace_performance_leave("cache_tree_update");
if (i < 0)
return i;
@@ -492,7 +496,9 @@ static void write_one(struct strbuf *buffer, struct cache_tree *it,
void cache_tree_write(struct strbuf *sb, struct cache_tree *root)
{
+ trace2_region_enter("cache_tree", "write", the_repository);
write_one(sb, root, "", 0);
+ trace2_region_leave("cache_tree", "write", the_repository);
}
static struct cache_tree *read_one(const char **buffer, unsigned long *size_p)
@@ -581,9 +587,16 @@ static struct cache_tree *read_one(const char **buffer, unsigned long *size_p)
struct cache_tree *cache_tree_read(const char *buffer, unsigned long size)
{
+ struct cache_tree *result;
+
if (buffer[0])
return NULL; /* not the whole tree */
- return read_one(&buffer, &size);
+
+ trace2_region_enter("cache_tree", "read", the_repository);
+ result = read_one(&buffer, &size);
+ trace2_region_leave("cache_tree", "read", the_repository);
+
+ return result;
}
static struct cache_tree *cache_tree_find(struct cache_tree *it, const char *path)
@@ -733,10 +746,13 @@ void prime_cache_tree(struct repository *r,
struct index_state *istate,
struct tree *tree)
{
+ trace2_region_enter("cache-tree", "prime_cache_tree", the_repository);
cache_tree_free(&istate->cache_tree);
istate->cache_tree = cache_tree();
+
prime_cache_tree_rec(r, istate->cache_tree, tree);
istate->cache_changed |= CACHE_TREE_CHANGED;
+ trace2_region_leave("cache-tree", "prime_cache_tree", the_repository);
}
/*
diff --git a/t/t7104-reset-hard.sh b/t/t7104-reset-hard.sh
index 16faa07813..7948ec392b 100755
--- a/t/t7104-reset-hard.sh
+++ b/t/t7104-reset-hard.sh
@@ -33,7 +33,7 @@ test_expect_success 'reset --hard should restore unmerged ones' '
'
-test_expect_success 'reset --hard did not corrupt index or cached-tree' '
+test_expect_success 'reset --hard did not corrupt index or cache-tree' '
T=$(git write-tree) &&
rm -f .git/index &&
diff --git a/tree-walk.c b/tree-walk.c
index 0160294712..2d6226d5f1 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -4,6 +4,7 @@
#include "object-store.h"
#include "tree.h"
#include "pathspec.h"
+#include "json-writer.h"
static const char *get_mode(const char *str, unsigned int *modep)
{
@@ -167,6 +168,25 @@ int tree_entry_gently(struct tree_desc *desc, struct name_entry *entry)
return 1;
}
+static int traverse_trees_atexit_registered;
+static int traverse_trees_count;
+static int traverse_trees_cur_depth;
+static int traverse_trees_max_depth;
+
+static void trace2_traverse_trees_statistics_atexit(void)
+{
+ struct json_writer jw = JSON_WRITER_INIT;
+
+ jw_object_begin(&jw, 0);
+ jw_object_intmax(&jw, "traverse_trees_count", traverse_trees_count);
+ jw_object_intmax(&jw, "traverse_trees_max_depth", traverse_trees_max_depth);
+ jw_end(&jw);
+
+ trace2_data_json("traverse_trees", the_repository, "statistics", &jw);
+
+ jw_release(&jw);
+}
+
void setup_traverse_info(struct traverse_info *info, const char *base)
{
size_t pathlen = strlen(base);
@@ -180,6 +200,11 @@ void setup_traverse_info(struct traverse_info *info, const char *base)
info->namelen = pathlen;
if (pathlen)
info->prev = &dummy;
+
+ if (trace2_is_enabled() && !traverse_trees_atexit_registered) {
+ atexit(trace2_traverse_trees_statistics_atexit);
+ traverse_trees_atexit_registered = 1;
+ }
}
char *make_traverse_path(char *path, size_t pathlen,
@@ -416,6 +441,12 @@ int traverse_trees(struct index_state *istate,
int interesting = 1;
char *traverse_path;
+ traverse_trees_count++;
+ traverse_trees_cur_depth++;
+
+ if (traverse_trees_cur_depth > traverse_trees_max_depth)
+ traverse_trees_max_depth = traverse_trees_cur_depth;
+
if (n >= ARRAY_SIZE(entry))
BUG("traverse_trees() called with too many trees (%d)", n);
@@ -515,6 +546,8 @@ int traverse_trees(struct index_state *istate,
free(traverse_path);
info->traverse_path = NULL;
strbuf_release(&base);
+
+ traverse_trees_cur_depth--;
return error;
}
diff --git a/unpack-trees.c b/unpack-trees.c
index 323280dd48..af6e9b9c2f 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1580,6 +1580,8 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
die("unpack_trees takes at most %d trees", MAX_UNPACK_TREES);
trace_performance_enter();
+ trace2_region_enter("unpack_trees", "unpack_trees", the_repository);
+
if (!core_apply_sparse_checkout || !o->update)
o->skip_sparse_checkout = 1;
if (!o->skip_sparse_checkout && !o->pl) {
@@ -1653,7 +1655,9 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
}
trace_performance_enter();
+ trace2_region_enter("unpack_trees", "traverse_trees", the_repository);
ret = traverse_trees(o->src_index, len, t, &info);
+ trace2_region_leave("unpack_trees", "traverse_trees", the_repository);
trace_performance_leave("traverse_trees");
if (ret < 0)
goto return_failed;
@@ -1741,6 +1745,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
done:
if (free_pattern_list)
clear_pattern_list(&pl);
+ trace2_region_leave("unpack_trees", "unpack_trees", the_repository);
trace_performance_leave("unpack_trees");
return ret;