diff options
author | Junio C Hamano <gitster@pobox.com> | 2020-07-30 13:20:30 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2020-07-30 13:20:30 -0700 |
commit | de6dda0dc3d95cd9aaf43a7b85ceeb57316dcc27 (patch) | |
tree | 8c970a139cd81136e71e6db49807b55878b6d54e | |
parent | Git 2.28 (diff) | |
parent | commit-graph: simplify write_commit_graph_file() #2 (diff) | |
download | tgif-de6dda0dc3d95cd9aaf43a7b85ceeb57316dcc27.tar.xz |
Merge branch 'sg/commit-graph-cleanups' into master
The changed-path Bloom filter is improved using ideas from an
independent implementation.
* sg/commit-graph-cleanups:
commit-graph: simplify write_commit_graph_file() #2
commit-graph: simplify write_commit_graph_file() #1
commit-graph: simplify parse_commit_graph() #2
commit-graph: simplify parse_commit_graph() #1
commit-graph: clean up #includes
diff.h: drop diff_tree_oid() & friends' return value
commit-slab: add a function to deep free entries on the slab
commit-graph-format.txt: all multi-byte numbers are in network byte order
commit-graph: fix parsing the Chunk Lookup table
tree-walk.c: don't match submodule entries for 'submod/anything'
-rw-r--r-- | Documentation/technical/commit-graph-format.txt | 2 | ||||
-rw-r--r-- | commit-graph.c | 112 | ||||
-rw-r--r-- | commit-graph.h | 6 | ||||
-rw-r--r-- | commit-slab-decl.h | 1 | ||||
-rw-r--r-- | commit-slab-impl.h | 13 | ||||
-rw-r--r-- | commit-slab.h | 10 | ||||
-rw-r--r-- | diff.h | 10 | ||||
-rw-r--r-- | revision.c | 9 | ||||
-rw-r--r-- | shallow.c | 14 | ||||
-rwxr-xr-x | t/t4010-diff-pathspec.sh | 4 | ||||
-rwxr-xr-x | t/t5318-commit-graph.sh | 5 | ||||
-rw-r--r-- | tree-diff.c | 30 | ||||
-rw-r--r-- | tree-walk.c | 9 |
13 files changed, 117 insertions, 108 deletions
diff --git a/Documentation/technical/commit-graph-format.txt b/Documentation/technical/commit-graph-format.txt index 1beef17182..440541045d 100644 --- a/Documentation/technical/commit-graph-format.txt +++ b/Documentation/technical/commit-graph-format.txt @@ -32,7 +32,7 @@ the body into "chunks" and provide a binary lookup table at the beginning of the body. The header includes certain values, such as number of chunks and hash type. -All 4-byte numbers are in network order. +All multi-byte numbers are in network byte order. HEADER: diff --git a/commit-graph.c b/commit-graph.c index 1af68c297d..1f37097dff 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1,7 +1,5 @@ -#include "cache.h" -#include "config.h" -#include "dir.h" #include "git-compat-util.h" +#include "config.h" #include "lockfile.h" #include "pack.h" #include "packfile.h" @@ -285,8 +283,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size) const unsigned char *data, *chunk_lookup; uint32_t i; struct commit_graph *graph; - uint64_t last_chunk_offset; - uint32_t last_chunk_id; + uint64_t next_chunk_offset; uint32_t graph_signature; unsigned char graph_version, hash_version; @@ -326,24 +323,26 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size) graph->data = graph_map; graph->data_len = graph_size; - last_chunk_id = 0; - last_chunk_offset = 8; + if (graph_size < GRAPH_HEADER_SIZE + + (graph->num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH + + GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) { + error(_("commit-graph file is too small to hold %u chunks"), + graph->num_chunks); + free(graph); + return NULL; + } + chunk_lookup = data + 8; + next_chunk_offset = get_be64(chunk_lookup + 4); for (i = 0; i < graph->num_chunks; i++) { uint32_t chunk_id; - uint64_t chunk_offset; + uint64_t chunk_offset = next_chunk_offset; int chunk_repeated = 0; - if (data + graph_size - chunk_lookup < - GRAPH_CHUNKLOOKUP_WIDTH) { - error(_("commit-graph chunk lookup table entry missing; file may be incomplete")); - goto free_and_return; - } - chunk_id = get_be32(chunk_lookup + 0); - chunk_offset = get_be64(chunk_lookup + 4); chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH; + next_chunk_offset = get_be64(chunk_lookup + 4); if (chunk_offset > graph_size - the_hash_algo->rawsz) { error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32), @@ -362,8 +361,11 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size) case GRAPH_CHUNKID_OIDLOOKUP: if (graph->chunk_oid_lookup) chunk_repeated = 1; - else + else { graph->chunk_oid_lookup = data + chunk_offset; + graph->num_commits = (next_chunk_offset - chunk_offset) + / graph->hash_len; + } break; case GRAPH_CHUNKID_DATA: @@ -417,15 +419,6 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size) error(_("commit-graph chunk id %08x appears multiple times"), chunk_id); goto free_and_return; } - - if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP) - { - graph->num_commits = (chunk_offset - last_chunk_offset) - / graph->hash_len; - } - - last_chunk_id = chunk_id; - last_chunk_offset = chunk_offset; } if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) { @@ -1586,17 +1579,22 @@ static int write_graph_chunk_base(struct hashfile *f, return 0; } +struct chunk_info { + uint32_t id; + uint64_t size; +}; + static int write_commit_graph_file(struct write_commit_graph_context *ctx) { uint32_t i; int fd; struct hashfile *f; struct lock_file lk = LOCK_INIT; - uint32_t chunk_ids[MAX_NUM_CHUNKS + 1]; - uint64_t chunk_offsets[MAX_NUM_CHUNKS + 1]; + struct chunk_info chunks[MAX_NUM_CHUNKS + 1]; const unsigned hashsz = the_hash_algo->rawsz; struct strbuf progress_title = STRBUF_INIT; int num_chunks = 3; + uint64_t chunk_offset; struct object_id file_hash; const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS; @@ -1644,51 +1642,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); } - chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; - chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; - chunk_ids[2] = GRAPH_CHUNKID_DATA; + chunks[0].id = GRAPH_CHUNKID_OIDFANOUT; + chunks[0].size = GRAPH_FANOUT_SIZE; + chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP; + chunks[1].size = hashsz * ctx->commits.nr; + chunks[2].id = GRAPH_CHUNKID_DATA; + chunks[2].size = (hashsz + 16) * ctx->commits.nr; if (ctx->num_extra_edges) { - chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES; + chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES; + chunks[num_chunks].size = 4 * ctx->num_extra_edges; num_chunks++; } if (ctx->changed_paths) { - chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMINDEXES; + chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES; + chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr; num_chunks++; - chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMDATA; + chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA; + chunks[num_chunks].size = sizeof(uint32_t) * 3 + + ctx->total_bloom_filter_data_size; num_chunks++; } if (ctx->num_commit_graphs_after > 1) { - chunk_ids[num_chunks] = GRAPH_CHUNKID_BASE; + chunks[num_chunks].id = GRAPH_CHUNKID_BASE; + chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1); num_chunks++; } - chunk_ids[num_chunks] = 0; - - chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; - chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE; - chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr; - chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr; - - num_chunks = 3; - if (ctx->num_extra_edges) { - chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + - 4 * ctx->num_extra_edges; - num_chunks++; - } - if (ctx->changed_paths) { - chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + - sizeof(uint32_t) * ctx->commits.nr; - num_chunks++; - - chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + - sizeof(uint32_t) * 3 + ctx->total_bloom_filter_data_size; - num_chunks++; - } - if (ctx->num_commit_graphs_after > 1) { - chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + - hashsz * (ctx->num_commit_graphs_after - 1); - num_chunks++; - } + chunks[num_chunks].id = 0; + chunks[num_chunks].size = 0; hashwrite_be32(f, GRAPH_SIGNATURE); @@ -1697,13 +1678,16 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) hashwrite_u8(f, num_chunks); hashwrite_u8(f, ctx->num_commit_graphs_after - 1); + chunk_offset = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; for (i = 0; i <= num_chunks; i++) { uint32_t chunk_write[3]; - chunk_write[0] = htonl(chunk_ids[i]); - chunk_write[1] = htonl(chunk_offsets[i] >> 32); - chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff); + chunk_write[0] = htonl(chunks[i].id); + chunk_write[1] = htonl(chunk_offset >> 32); + chunk_write[2] = htonl(chunk_offset & 0xffffffff); hashwrite(f, chunk_write, 12); + + chunk_offset += chunks[i].size; } if (ctx->report_progress) { diff --git a/commit-graph.h b/commit-graph.h index 28f89cdf3e..cf8d3c9647 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -2,9 +2,6 @@ #define COMMIT_GRAPH_H #include "git-compat-util.h" -#include "repository.h" -#include "string-list.h" -#include "cache.h" #include "object-store.h" #include "oidset.h" @@ -23,6 +20,9 @@ void git_test_write_commit_graph_or_die(void); struct commit; struct bloom_filter_settings; +struct repository; +struct raw_object_store; +struct string_list; char *get_commit_graph_filename(struct object_directory *odb); int open_commit_graph(const char *graph_file, int *fd, struct stat *st); diff --git a/commit-slab-decl.h b/commit-slab-decl.h index bfbed1516a..98de2c970c 100644 --- a/commit-slab-decl.h +++ b/commit-slab-decl.h @@ -32,6 +32,7 @@ struct slabname { \ void init_ ##slabname## _with_stride(struct slabname *s, unsigned stride); \ void init_ ##slabname(struct slabname *s); \ void clear_ ##slabname(struct slabname *s); \ +void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *ptr)); \ elemtype *slabname## _at_peek(struct slabname *s, const struct commit *c, int add_if_missing); \ elemtype *slabname## _at(struct slabname *s, const struct commit *c); \ elemtype *slabname## _peek(struct slabname *s, const struct commit *c) diff --git a/commit-slab-impl.h b/commit-slab-impl.h index 5c0eb91a5d..557738df27 100644 --- a/commit-slab-impl.h +++ b/commit-slab-impl.h @@ -38,6 +38,19 @@ scope void clear_ ##slabname(struct slabname *s) \ FREE_AND_NULL(s->slab); \ } \ \ +scope void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *)) \ +{ \ + unsigned int i; \ + for (i = 0; i < s->slab_count; i++) { \ + unsigned int j; \ + if (!s->slab[i]) \ + continue; \ + for (j = 0; j < s->slab_size; j++) \ + free_fn(&s->slab[i][j * s->stride]); \ + } \ + clear_ ##slabname(s); \ +} \ + \ scope elemtype *slabname## _at_peek(struct slabname *s, \ const struct commit *c, \ int add_if_missing) \ diff --git a/commit-slab.h b/commit-slab.h index 05b3f2804e..8e72a30536 100644 --- a/commit-slab.h +++ b/commit-slab.h @@ -47,6 +47,16 @@ * * Call this function before the slab falls out of scope to avoid * leaking memory. + * + * - void deep_clear_indegree(struct indegree *, void (*free_fn)(int*)) + * + * Empties the slab, similar to clear_indegree(), but in addition it + * calls the given 'free_fn' for each slab entry to release any + * additional memory that might be owned by the entry (but not the + * entry itself!). + * Note that 'free_fn' might be called even for entries for which no + * indegree_at() call has been made; in this case 'free_fn' is invoked + * with a pointer to a zero-initialized location. */ #define define_commit_slab(slabname, elemtype) \ @@ -431,11 +431,11 @@ struct combine_diff_path *diff_tree_paths( struct combine_diff_path *p, const struct object_id *oid, const struct object_id **parents_oid, int nparent, struct strbuf *base, struct diff_options *opt); -int diff_tree_oid(const struct object_id *old_oid, - const struct object_id *new_oid, - const char *base, struct diff_options *opt); -int diff_root_tree_oid(const struct object_id *new_oid, const char *base, - struct diff_options *opt); +void diff_tree_oid(const struct object_id *old_oid, + const struct object_id *new_oid, + const char *base, struct diff_options *opt); +void diff_root_tree_oid(const struct object_id *new_oid, const char *base, + struct diff_options *opt); struct combine_diff_path { struct combine_diff_path *next; diff --git a/revision.c b/revision.c index 6aa7f4f567..049008c728 100644 --- a/revision.c +++ b/revision.c @@ -791,9 +791,7 @@ static int rev_compare_tree(struct rev_info *revs, tree_difference = REV_TREE_SAME; revs->pruning.flags.has_changes = 0; - if (diff_tree_oid(&t1->object.oid, &t2->object.oid, "", - &revs->pruning) < 0) - return REV_TREE_DIFFERENT; + diff_tree_oid(&t1->object.oid, &t2->object.oid, "", &revs->pruning); if (!nth_parent) if (bloom_ret == 1 && tree_difference == REV_TREE_SAME) @@ -804,7 +802,6 @@ static int rev_compare_tree(struct rev_info *revs, static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit) { - int retval; struct tree *t1 = get_commit_tree(commit); if (!t1) @@ -812,9 +809,9 @@ static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit) tree_difference = REV_TREE_SAME; revs->pruning.flags.has_changes = 0; - retval = diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning); + diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning); - return retval >= 0 && (tree_difference == REV_TREE_SAME); + return tree_difference == REV_TREE_SAME; } struct treesame_state { @@ -110,6 +110,10 @@ void rollback_shallow_file(struct repository *r, struct shallow_lock *lk) * supports a "valid" flag. */ define_commit_slab(commit_depth, int *); +static void free_depth_in_slab(int **ptr) +{ + FREE_AND_NULL(*ptr); +} struct commit_list *get_shallow_commits(struct object_array *heads, int depth, int shallow_flag, int not_shallow_flag) { @@ -176,15 +180,7 @@ struct commit_list *get_shallow_commits(struct object_array *heads, int depth, } } } - for (i = 0; i < depths.slab_count; i++) { - int j; - - if (!depths.slab[i]) - continue; - for (j = 0; j < depths.slab_size; j++) - free(depths.slab[i][j]); - } - clear_commit_depth(&depths); + deep_clear_commit_depth(&depths, free_depth_in_slab); return result; } diff --git a/t/t4010-diff-pathspec.sh b/t/t4010-diff-pathspec.sh index e5ca359edf..65cc703c65 100755 --- a/t/t4010-diff-pathspec.sh +++ b/t/t4010-diff-pathspec.sh @@ -125,7 +125,9 @@ test_expect_success 'setup submodules' ' test_expect_success 'diff-tree ignores trailing slash on submodule path' ' git diff --name-only HEAD^ HEAD submod >expect && git diff --name-only HEAD^ HEAD submod/ >actual && - test_cmp expect actual + test_cmp expect actual && + git diff --name-only HEAD^ HEAD -- submod/whatever >actual && + test_must_be_empty actual ' test_expect_success 'diff multiple wildcard pathspecs' ' diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 26f332d6a3..50d5f0849f 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -529,7 +529,7 @@ test_expect_success 'detect bad hash version' ' ' test_expect_success 'detect low chunk count' ' - corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\02" \ + corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\01" \ "missing the .* chunk" ' @@ -615,7 +615,8 @@ test_expect_success 'detect invalid checksum hash' ' test_expect_success 'detect incorrect chunk count' ' corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\377" \ - "chunk lookup table entry missing" $GRAPH_CHUNK_LOOKUP_OFFSET + "commit-graph file is too small to hold [0-9]* chunks" \ + $GRAPH_CHUNK_LOOKUP_OFFSET ' test_expect_success 'git fsck (checks commit-graph)' ' diff --git a/tree-diff.c b/tree-diff.c index f3d303c6e5..6ebad1a46f 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -29,9 +29,9 @@ static struct combine_diff_path *ll_diff_tree_paths( struct combine_diff_path *p, const struct object_id *oid, const struct object_id **parents_oid, int nparent, struct strbuf *base, struct diff_options *opt); -static int ll_diff_tree_oid(const struct object_id *old_oid, - const struct object_id *new_oid, - struct strbuf *base, struct diff_options *opt); +static void ll_diff_tree_oid(const struct object_id *old_oid, + const struct object_id *new_oid, + struct strbuf *base, struct diff_options *opt); /* * Compare two tree entries, taking into account only path/S_ISDIR(mode), @@ -679,9 +679,9 @@ static void try_to_follow_renames(const struct object_id *old_oid, q->nr = 1; } -static int ll_diff_tree_oid(const struct object_id *old_oid, - const struct object_id *new_oid, - struct strbuf *base, struct diff_options *opt) +static void ll_diff_tree_oid(const struct object_id *old_oid, + const struct object_id *new_oid, + struct strbuf *base, struct diff_options *opt) { struct combine_diff_path phead, *p; pathchange_fn_t pathchange_old = opt->pathchange; @@ -697,29 +697,27 @@ static int ll_diff_tree_oid(const struct object_id *old_oid, } opt->pathchange = pathchange_old; - return 0; } -int diff_tree_oid(const struct object_id *old_oid, - const struct object_id *new_oid, - const char *base_str, struct diff_options *opt) +void diff_tree_oid(const struct object_id *old_oid, + const struct object_id *new_oid, + const char *base_str, struct diff_options *opt) { struct strbuf base; - int retval; strbuf_init(&base, PATH_MAX); strbuf_addstr(&base, base_str); - retval = ll_diff_tree_oid(old_oid, new_oid, &base, opt); + ll_diff_tree_oid(old_oid, new_oid, &base, opt); if (!*base_str && opt->flags.follow_renames && diff_might_be_rename()) try_to_follow_renames(old_oid, new_oid, &base, opt); strbuf_release(&base); - - return retval; } -int diff_root_tree_oid(const struct object_id *new_oid, const char *base, struct diff_options *opt) +void diff_root_tree_oid(const struct object_id *new_oid, + const char *base, + struct diff_options *opt) { - return diff_tree_oid(NULL, new_oid, base, opt); + diff_tree_oid(NULL, new_oid, base, opt); } diff --git a/tree-walk.c b/tree-walk.c index bb0ad34c54..0160294712 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -851,7 +851,14 @@ static int match_entry(const struct pathspec_item *item, if (matchlen > pathlen) { if (match[pathlen] != '/') return 0; - if (!S_ISDIR(entry->mode) && !S_ISGITLINK(entry->mode)) + /* + * Reject non-directories as partial pathnames, except + * when match is a submodule with a trailing slash and + * nothing else (to handle 'submod/' and 'submod' + * uniformly). + */ + if (!S_ISDIR(entry->mode) && + (!S_ISGITLINK(entry->mode) || matchlen > pathlen + 1)) return 0; } |