diff options
-rw-r--r-- | Documentation/technical/index-format.txt | 42 | ||||
-rw-r--r-- | cache-tree.c | 30 | ||||
-rwxr-xr-x | t/t7104-reset-hard.sh | 2 | ||||
-rw-r--r-- | tree-walk.c | 33 | ||||
-rw-r--r-- | unpack-trees.c | 5 |
5 files changed, 94 insertions, 18 deletions
diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt index 69edf46c03..b633482b1b 100644 --- a/Documentation/technical/index-format.txt +++ b/Documentation/technical/index-format.txt @@ -26,7 +26,7 @@ Git index format Extensions are identified by signature. Optional extensions can be ignored if Git does not understand them. - Git currently supports cached tree and resolve undo extensions. + Git currently supports cache tree and resolve undo extensions. 4-byte extension signature. If the first byte is 'A'..'Z' the extension is optional and can be ignored. @@ -136,14 +136,35 @@ Git index format == Extensions -=== Cached tree - - Cached tree extension contains pre-computed hashes for trees that can - be derived from the index. It helps speed up tree object generation - from index for a new commit. - - When a path is updated in index, the path must be invalidated and - removed from tree cache. +=== Cache tree + + Since the index does not record entries for directories, the cache + entries cannot describe tree objects that already exist in the object + database for regions of the index that are unchanged from an existing + commit. The cache tree extension stores a recursive tree structure that + describes the trees that already exist and completely match sections of + the cache entries. This speeds up tree object generation from the index + for a new commit by only computing the trees that are "new" to that + commit. It also assists when comparing the index to another tree, such + as `HEAD^{tree}`, since sections of the index can be skipped when a tree + comparison demonstrates equality. + + The recursive tree structure uses nodes that store a number of cache + entries, a list of subnodes, and an object ID (OID). The OID references + the existing tree for that node, if it is known to exist. The subnodes + correspond to subdirectories that themselves have cache tree nodes. The + number of cache entries corresponds to the number of cache entries in + the index that describe paths within that tree's directory. + + The extension tracks the full directory structure in the cache tree + extension, but this is generally smaller than the full cache entry list. + + When a path is updated in index, Git invalidates all nodes of the + recursive cache tree corresponding to the parent directories of that + path. We store these tree nodes as being "invalid" by using "-1" as the + number of cache entries. Invalid nodes still store a span of index + entries, allowing Git to focus its efforts when reconstructing a full + cache tree. The signature for this extension is { 'T', 'R', 'E', 'E' }. @@ -174,7 +195,8 @@ Git index format first entry represents the root level of the repository, followed by the first subtree--let's call this A--of the root level (with its name relative to the root level), followed by the first subtree of A (with - its name relative to A), ... + its name relative to A), and so on. The specified number of subtrees + indicates when the current level of the recursive stack is complete. === Resolve undo diff --git a/cache-tree.c b/cache-tree.c index a537a806c1..3f1a8d4f1b 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -185,12 +185,14 @@ static int verify_cache(struct cache_entry **cache, * the cache is sorted. Also path can appear only once, * which means conflicting one would immediately follow. */ - const char *this_name = cache[i]->name; - const char *next_name = cache[i+1]->name; - int this_len = strlen(this_name); - if (this_len < strlen(next_name) && - strncmp(this_name, next_name, this_len) == 0 && - next_name[this_len] == '/') { + const struct cache_entry *this_ce = cache[i]; + const struct cache_entry *next_ce = cache[i + 1]; + const char *this_name = this_ce->name; + const char *next_name = next_ce->name; + int this_len = ce_namelen(this_ce); + if (this_len < ce_namelen(next_ce) && + next_name[this_len] == '/' && + strncmp(this_name, next_name, this_len) == 0) { if (10 < ++funny) { fprintf(stderr, "...\n"); break; @@ -442,7 +444,9 @@ int cache_tree_update(struct index_state *istate, int flags) if (i) return i; trace_performance_enter(); + trace2_region_enter("cache_tree", "update", the_repository); i = update_one(it, cache, entries, "", 0, &skip, flags); + trace2_region_leave("cache_tree", "update", the_repository); trace_performance_leave("cache_tree_update"); if (i < 0) return i; @@ -492,7 +496,9 @@ static void write_one(struct strbuf *buffer, struct cache_tree *it, void cache_tree_write(struct strbuf *sb, struct cache_tree *root) { + trace2_region_enter("cache_tree", "write", the_repository); write_one(sb, root, "", 0); + trace2_region_leave("cache_tree", "write", the_repository); } static struct cache_tree *read_one(const char **buffer, unsigned long *size_p) @@ -581,9 +587,16 @@ static struct cache_tree *read_one(const char **buffer, unsigned long *size_p) struct cache_tree *cache_tree_read(const char *buffer, unsigned long size) { + struct cache_tree *result; + if (buffer[0]) return NULL; /* not the whole tree */ - return read_one(&buffer, &size); + + trace2_region_enter("cache_tree", "read", the_repository); + result = read_one(&buffer, &size); + trace2_region_leave("cache_tree", "read", the_repository); + + return result; } static struct cache_tree *cache_tree_find(struct cache_tree *it, const char *path) @@ -733,10 +746,13 @@ void prime_cache_tree(struct repository *r, struct index_state *istate, struct tree *tree) { + trace2_region_enter("cache-tree", "prime_cache_tree", the_repository); cache_tree_free(&istate->cache_tree); istate->cache_tree = cache_tree(); + prime_cache_tree_rec(r, istate->cache_tree, tree); istate->cache_changed |= CACHE_TREE_CHANGED; + trace2_region_leave("cache-tree", "prime_cache_tree", the_repository); } /* diff --git a/t/t7104-reset-hard.sh b/t/t7104-reset-hard.sh index 16faa07813..7948ec392b 100755 --- a/t/t7104-reset-hard.sh +++ b/t/t7104-reset-hard.sh @@ -33,7 +33,7 @@ test_expect_success 'reset --hard should restore unmerged ones' ' ' -test_expect_success 'reset --hard did not corrupt index or cached-tree' ' +test_expect_success 'reset --hard did not corrupt index or cache-tree' ' T=$(git write-tree) && rm -f .git/index && diff --git a/tree-walk.c b/tree-walk.c index 0160294712..2d6226d5f1 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -4,6 +4,7 @@ #include "object-store.h" #include "tree.h" #include "pathspec.h" +#include "json-writer.h" static const char *get_mode(const char *str, unsigned int *modep) { @@ -167,6 +168,25 @@ int tree_entry_gently(struct tree_desc *desc, struct name_entry *entry) return 1; } +static int traverse_trees_atexit_registered; +static int traverse_trees_count; +static int traverse_trees_cur_depth; +static int traverse_trees_max_depth; + +static void trace2_traverse_trees_statistics_atexit(void) +{ + struct json_writer jw = JSON_WRITER_INIT; + + jw_object_begin(&jw, 0); + jw_object_intmax(&jw, "traverse_trees_count", traverse_trees_count); + jw_object_intmax(&jw, "traverse_trees_max_depth", traverse_trees_max_depth); + jw_end(&jw); + + trace2_data_json("traverse_trees", the_repository, "statistics", &jw); + + jw_release(&jw); +} + void setup_traverse_info(struct traverse_info *info, const char *base) { size_t pathlen = strlen(base); @@ -180,6 +200,11 @@ void setup_traverse_info(struct traverse_info *info, const char *base) info->namelen = pathlen; if (pathlen) info->prev = &dummy; + + if (trace2_is_enabled() && !traverse_trees_atexit_registered) { + atexit(trace2_traverse_trees_statistics_atexit); + traverse_trees_atexit_registered = 1; + } } char *make_traverse_path(char *path, size_t pathlen, @@ -416,6 +441,12 @@ int traverse_trees(struct index_state *istate, int interesting = 1; char *traverse_path; + traverse_trees_count++; + traverse_trees_cur_depth++; + + if (traverse_trees_cur_depth > traverse_trees_max_depth) + traverse_trees_max_depth = traverse_trees_cur_depth; + if (n >= ARRAY_SIZE(entry)) BUG("traverse_trees() called with too many trees (%d)", n); @@ -515,6 +546,8 @@ int traverse_trees(struct index_state *istate, free(traverse_path); info->traverse_path = NULL; strbuf_release(&base); + + traverse_trees_cur_depth--; return error; } diff --git a/unpack-trees.c b/unpack-trees.c index 323280dd48..af6e9b9c2f 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1580,6 +1580,8 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options die("unpack_trees takes at most %d trees", MAX_UNPACK_TREES); trace_performance_enter(); + trace2_region_enter("unpack_trees", "unpack_trees", the_repository); + if (!core_apply_sparse_checkout || !o->update) o->skip_sparse_checkout = 1; if (!o->skip_sparse_checkout && !o->pl) { @@ -1653,7 +1655,9 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options } trace_performance_enter(); + trace2_region_enter("unpack_trees", "traverse_trees", the_repository); ret = traverse_trees(o->src_index, len, t, &info); + trace2_region_leave("unpack_trees", "traverse_trees", the_repository); trace_performance_leave("traverse_trees"); if (ret < 0) goto return_failed; @@ -1741,6 +1745,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options done: if (free_pattern_list) clear_pattern_list(&pl); + trace2_region_leave("unpack_trees", "unpack_trees", the_repository); trace_performance_leave("unpack_trees"); return ret; |