diff options
Diffstat (limited to 'commit-graph.c')
-rw-r--r-- | commit-graph.c | 678 |
1 files changed, 411 insertions, 267 deletions
diff --git a/commit-graph.c b/commit-graph.c index 031641014f..2bcb4e0f89 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -7,7 +7,7 @@ #include "object.h" #include "refs.h" #include "revision.h" -#include "sha1-lookup.h" +#include "hash-lookup.h" #include "commit-graph.h" #include "object-store.h" #include "alloc.h" @@ -19,6 +19,7 @@ #include "shallow.h" #include "json-writer.h" #include "trace2.h" +#include "chunk-format.h" void git_test_write_commit_graph_or_die(void) { @@ -38,11 +39,12 @@ void git_test_write_commit_graph_or_die(void) #define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */ #define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ #define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */ +#define GRAPH_CHUNKID_GENERATION_DATA 0x47444154 /* "GDAT" */ +#define GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW 0x47444f56 /* "GDOV" */ #define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */ #define GRAPH_CHUNKID_BLOOMINDEXES 0x42494458 /* "BIDX" */ #define GRAPH_CHUNKID_BLOOMDATA 0x42444154 /* "BDAT" */ #define GRAPH_CHUNKID_BASE 0x42415345 /* "BASE" */ -#define MAX_NUM_CHUNKS 7 #define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16) @@ -57,13 +59,16 @@ void git_test_write_commit_graph_or_die(void) #define GRAPH_HEADER_SIZE 8 #define GRAPH_FANOUT_SIZE (4 * 256) -#define GRAPH_CHUNKLOOKUP_WIDTH 12 -#define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \ +#define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * CHUNK_TOC_ENTRY_SIZE \ + GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) +#define CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW (1ULL << 31) + /* Remember to update object flag allocation in object.h */ #define REACHABLE (1u<<15) +define_commit_slab(topo_level_slab, uint32_t); + /* Keep track of the order in which commits are added to our list. */ define_commit_slab(commit_pos, int); static struct commit_pos commit_pos = COMMIT_SLAB_INIT(1, commit_pos); @@ -91,6 +96,13 @@ define_commit_slab(commit_graph_data_slab, struct commit_graph_data); static struct commit_graph_data_slab commit_graph_data_slab = COMMIT_SLAB_INIT(1, commit_graph_data_slab); +static int get_configured_generation_version(struct repository *r) +{ + int version = 2; + repo_config_get_int(r, "commitgraph.generationversion", &version); + return version; +} + uint32_t commit_graph_position(const struct commit *c) { struct commit_graph_data *data = @@ -99,7 +111,7 @@ uint32_t commit_graph_position(const struct commit *c) return data ? data->graph_pos : COMMIT_NOT_FROM_GRAPH; } -uint32_t commit_graph_generation(const struct commit *c) +timestamp_t commit_graph_generation(const struct commit *c) { struct commit_graph_data *data = commit_graph_data_slab_peek(&commit_graph_data_slab, c); @@ -139,13 +151,17 @@ static struct commit_graph_data *commit_graph_data_at(const struct commit *c) return data; } +/* + * Should be used only while writing commit-graph as it compares + * generation value of commits by directly accessing commit-slab. + */ static int commit_gen_cmp(const void *va, const void *vb) { const struct commit *a = *(const struct commit **)va; const struct commit *b = *(const struct commit **)vb; - uint32_t generation_a = commit_graph_generation(a); - uint32_t generation_b = commit_graph_generation(b); + const timestamp_t generation_a = commit_graph_data_at(a)->generation; + const timestamp_t generation_b = commit_graph_data_at(b)->generation; /* lower generation commits first */ if (generation_a < generation_b) return -1; @@ -288,15 +304,43 @@ static int verify_commit_graph_lite(struct commit_graph *g) return 0; } +static int graph_read_oid_lookup(const unsigned char *chunk_start, + size_t chunk_size, void *data) +{ + struct commit_graph *g = data; + g->chunk_oid_lookup = chunk_start; + g->num_commits = chunk_size / g->hash_len; + return 0; +} + +static int graph_read_bloom_data(const unsigned char *chunk_start, + size_t chunk_size, void *data) +{ + struct commit_graph *g = data; + uint32_t hash_version; + g->chunk_bloom_data = chunk_start; + hash_version = get_be32(chunk_start); + + if (hash_version != 1) + return 0; + + g->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings)); + g->bloom_filter_settings->hash_version = hash_version; + g->bloom_filter_settings->num_hashes = get_be32(chunk_start + 4); + g->bloom_filter_settings->bits_per_entry = get_be32(chunk_start + 8); + g->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES; + + return 0; +} + struct commit_graph *parse_commit_graph(struct repository *r, void *graph_map, size_t graph_size) { - const unsigned char *data, *chunk_lookup; - uint32_t i; + const unsigned char *data; struct commit_graph *graph; - uint64_t next_chunk_offset; uint32_t graph_signature; unsigned char graph_version, hash_version; + struct chunkfile *cf = NULL; if (!graph_map) return NULL; @@ -337,7 +381,7 @@ struct commit_graph *parse_commit_graph(struct repository *r, graph->data_len = graph_size; if (graph_size < GRAPH_HEADER_SIZE + - (graph->num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH + + (graph->num_chunks + 1) * CHUNK_TOC_ENTRY_SIZE + GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) { error(_("commit-graph file is too small to hold %u chunks"), graph->num_chunks); @@ -345,94 +389,31 @@ struct commit_graph *parse_commit_graph(struct repository *r, return NULL; } - chunk_lookup = data + 8; - next_chunk_offset = get_be64(chunk_lookup + 4); - for (i = 0; i < graph->num_chunks; i++) { - uint32_t chunk_id; - uint64_t chunk_offset = next_chunk_offset; - int chunk_repeated = 0; - - chunk_id = get_be32(chunk_lookup + 0); - - chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH; - next_chunk_offset = get_be64(chunk_lookup + 4); + cf = init_chunkfile(NULL); - if (chunk_offset > graph_size - the_hash_algo->rawsz) { - error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32), - (uint32_t)chunk_offset); - goto free_and_return; - } - - switch (chunk_id) { - case GRAPH_CHUNKID_OIDFANOUT: - if (graph->chunk_oid_fanout) - chunk_repeated = 1; - else - graph->chunk_oid_fanout = (uint32_t*)(data + chunk_offset); - break; - - case GRAPH_CHUNKID_OIDLOOKUP: - if (graph->chunk_oid_lookup) - chunk_repeated = 1; - else { - graph->chunk_oid_lookup = data + chunk_offset; - graph->num_commits = (next_chunk_offset - chunk_offset) - / graph->hash_len; - } - break; - - case GRAPH_CHUNKID_DATA: - if (graph->chunk_commit_data) - chunk_repeated = 1; - else - graph->chunk_commit_data = data + chunk_offset; - break; - - case GRAPH_CHUNKID_EXTRAEDGES: - if (graph->chunk_extra_edges) - chunk_repeated = 1; - else - graph->chunk_extra_edges = data + chunk_offset; - break; - - case GRAPH_CHUNKID_BASE: - if (graph->chunk_base_graphs) - chunk_repeated = 1; - else - graph->chunk_base_graphs = data + chunk_offset; - break; - - case GRAPH_CHUNKID_BLOOMINDEXES: - if (graph->chunk_bloom_indexes) - chunk_repeated = 1; - else if (r->settings.commit_graph_read_changed_paths) - graph->chunk_bloom_indexes = data + chunk_offset; - break; - - case GRAPH_CHUNKID_BLOOMDATA: - if (graph->chunk_bloom_data) - chunk_repeated = 1; - else if (r->settings.commit_graph_read_changed_paths) { - uint32_t hash_version; - graph->chunk_bloom_data = data + chunk_offset; - hash_version = get_be32(data + chunk_offset); + if (read_table_of_contents(cf, graph->data, graph_size, + GRAPH_HEADER_SIZE, graph->num_chunks)) + goto free_and_return; - if (hash_version != 1) - break; + pair_chunk(cf, GRAPH_CHUNKID_OIDFANOUT, + (const unsigned char **)&graph->chunk_oid_fanout); + read_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, graph_read_oid_lookup, graph); + pair_chunk(cf, GRAPH_CHUNKID_DATA, &graph->chunk_commit_data); + pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges); + pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs); - graph->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings)); - graph->bloom_filter_settings->hash_version = hash_version; - graph->bloom_filter_settings->num_hashes = get_be32(data + chunk_offset + 4); - graph->bloom_filter_settings->bits_per_entry = get_be32(data + chunk_offset + 8); - graph->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES; - } - break; - } + if (get_configured_generation_version(r) >= 2) { + pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA, + &graph->chunk_generation_data); + pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW, + &graph->chunk_generation_data_overflow); + } - if (chunk_repeated) { - error(_("commit-graph chunk id %08x appears multiple times"), chunk_id); - goto free_and_return; - } + if (r->settings.commit_graph_read_changed_paths) { + pair_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES, + &graph->chunk_bloom_indexes); + read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA, + graph_read_bloom_data, graph); } if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) { @@ -444,14 +425,16 @@ struct commit_graph *parse_commit_graph(struct repository *r, FREE_AND_NULL(graph->bloom_filter_settings); } - hashcpy(graph->oid.hash, graph->data + graph->data_len - graph->hash_len); + oidread(&graph->oid, graph->data + graph->data_len - graph->hash_len); if (verify_commit_graph_lite(graph)) goto free_and_return; + free_chunkfile(cf); return graph; free_and_return: + free_chunkfile(cf); free(graph->bloom_filter_settings); free(graph); return NULL; @@ -543,7 +526,7 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r, return NULL; count = st.st_size / (the_hash_algo->hexsz + 1); - oids = xcalloc(count, sizeof(struct object_id)); + CALLOC_ARRAY(oids, count); prepare_alt_odb(r); @@ -590,6 +573,31 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r, return graph_chain; } +/* + * returns 1 if and only if all graphs in the chain have + * corrected commit dates stored in the generation_data chunk. + */ +static int validate_mixed_generation_chain(struct commit_graph *g) +{ + int read_generation_data = 1; + struct commit_graph *p = g; + + while (read_generation_data && p) { + read_generation_data = p->read_generation_data; + p = p->base_graph; + } + + if (read_generation_data) + return 1; + + while (g) { + g->read_generation_data = 0; + g = g->base_graph; + } + + return 0; +} + struct commit_graph *read_commit_graph_one(struct repository *r, struct object_directory *odb) { @@ -598,6 +606,8 @@ struct commit_graph *read_commit_graph_one(struct repository *r, if (!g) g = load_commit_graph_chain(r, odb); + validate_mixed_generation_chain(g); + return g; } @@ -673,6 +683,20 @@ int generation_numbers_enabled(struct repository *r) return !!first_generation; } +int corrected_commit_dates_enabled(struct repository *r) +{ + struct commit_graph *g; + if (!prepare_commit_graph(r)) + return 0; + + g = r->objects->commit_graph; + + if (!g->num_commits) + return 0; + + return g->read_generation_data; +} + struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r) { struct commit_graph *g = r->objects->commit_graph; @@ -722,7 +746,7 @@ static void load_oid_from_graph(struct commit_graph *g, lex_index = pos - g->num_commits_in_base; - hashcpy(oid->hash, g->chunk_oid_lookup + g->hash_len * lex_index); + oidread(oid, g->chunk_oid_lookup + g->hash_len * lex_index); } static struct commit_list **insert_parent_or_die(struct repository *r, @@ -748,17 +772,41 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, { const unsigned char *commit_data; struct commit_graph_data *graph_data; - uint32_t lex_index; + uint32_t lex_index, offset_pos; + uint64_t date_high, date_low, offset; while (pos < g->num_commits_in_base) g = g->base_graph; + if (pos >= g->num_commits + g->num_commits_in_base) + die(_("invalid commit position. commit-graph is likely corrupt")); + lex_index = pos - g->num_commits_in_base; commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * lex_index; graph_data = commit_graph_data_at(item); graph_data->graph_pos = pos; - graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2; + + date_high = get_be32(commit_data + g->hash_len + 8) & 0x3; + date_low = get_be32(commit_data + g->hash_len + 12); + item->date = (timestamp_t)((date_high << 32) | date_low); + + if (g->read_generation_data) { + offset = (timestamp_t)get_be32(g->chunk_generation_data + sizeof(uint32_t) * lex_index); + + if (offset & CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW) { + if (!g->chunk_generation_data_overflow) + die(_("commit-graph requires overflow generation data but has none")); + + offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW; + graph_data->generation = get_be64(g->chunk_generation_data_overflow + 8 * offset_pos); + } else + graph_data->generation = item->date + offset; + } else + graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2; + + if (g->topo_levels) + *topo_level_slab_at(g->topo_levels, item) = get_be32(commit_data + g->hash_len + 8) >> 2; } static inline void set_commit_tree(struct commit *c, struct tree *t) @@ -772,38 +820,22 @@ static int fill_commit_in_graph(struct repository *r, { uint32_t edge_value; uint32_t *parent_data_ptr; - uint64_t date_low, date_high; struct commit_list **pptr; - struct commit_graph_data *graph_data; const unsigned char *commit_data; uint32_t lex_index; while (pos < g->num_commits_in_base) g = g->base_graph; - if (pos >= g->num_commits + g->num_commits_in_base) - die(_("invalid commit position. commit-graph is likely corrupt")); + fill_commit_graph_info(item, g, pos); - /* - * Store the "full" position, but then use the - * "local" position for the rest of the calculation. - */ - graph_data = commit_graph_data_at(item); - graph_data->graph_pos = pos; lex_index = pos - g->num_commits_in_base; - commit_data = g->chunk_commit_data + (g->hash_len + 16) * lex_index; item->object.parsed = 1; set_commit_tree(item, NULL); - date_high = get_be32(commit_data + g->hash_len + 8) & 0x3; - date_low = get_be32(commit_data + g->hash_len + 12); - item->date = (timestamp_t)((date_high << 32) | date_low); - - graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2; - pptr = &item->parents; edge_value = get_be32(commit_data + g->hash_len); @@ -907,7 +939,7 @@ static struct tree *load_tree_for_commit(struct repository *r, commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * (graph_pos - g->num_commits_in_base); - hashcpy(oid.hash, commit_data); + oidread(&oid, commit_data); set_commit_tree(c, lookup_tree(r, &oid)); return c->maybe_tree; @@ -943,6 +975,7 @@ struct write_commit_graph_context { struct oid_array oids; struct packed_commit_list commits; int num_extra_edges; + int num_generation_data_overflows; unsigned long approx_nr_objects; struct progress *progress; int progress_done; @@ -961,8 +994,11 @@ struct write_commit_graph_context { report_progress:1, split:1, changed_paths:1, - order_by_pack:1; + order_by_pack:1, + write_generation_data:1, + trust_generation_numbers:1; + struct topo_level_slab *topo_levels; const struct commit_graph_opts *opts; size_t total_bloom_filter_data_size; const struct bloom_filter_settings *bloom_settings; @@ -974,8 +1010,9 @@ struct write_commit_graph_context { }; static int write_graph_chunk_fanout(struct hashfile *f, - struct write_commit_graph_context *ctx) + void *data) { + struct write_commit_graph_context *ctx = data; int i, count = 0; struct commit **list = ctx->commits.list; @@ -1000,8 +1037,9 @@ static int write_graph_chunk_fanout(struct hashfile *f, } static int write_graph_chunk_oids(struct hashfile *f, - struct write_commit_graph_context *ctx) + void *data) { + struct write_commit_graph_context *ctx = data; struct commit **list = ctx->commits.list; int count; for (count = 0; count < ctx->commits.nr; count++, list++) { @@ -1012,15 +1050,16 @@ static int write_graph_chunk_oids(struct hashfile *f, return 0; } -static const unsigned char *commit_to_sha1(size_t index, void *table) +static const struct object_id *commit_to_oid(size_t index, const void *table) { - struct commit **commits = table; - return commits[index]->object.oid.hash; + const struct commit * const *commits = table; + return &commits[index]->object.oid; } static int write_graph_chunk_data(struct hashfile *f, - struct write_commit_graph_context *ctx) + void *data) { + struct write_commit_graph_context *ctx = data; struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; uint32_t num_extra_edges = 0; @@ -1032,7 +1071,7 @@ static int write_graph_chunk_data(struct hashfile *f, uint32_t packedDate[2]; display_progress(ctx->progress, ++ctx->progress_cnt); - if (parse_commit_no_graph(*list)) + if (repo_parse_commit_no_graph(ctx->r, *list)) die(_("unable to parse commit %s"), oid_to_hex(&(*list)->object.oid)); tree = get_commit_tree_oid(*list); @@ -1043,10 +1082,10 @@ static int write_graph_chunk_data(struct hashfile *f, if (!parent) edge_value = GRAPH_PARENT_NONE; else { - edge_value = sha1_pos(parent->item->object.oid.hash, - ctx->commits.list, - ctx->commits.nr, - commit_to_sha1); + edge_value = oid_pos(&parent->item->object.oid, + ctx->commits.list, + ctx->commits.nr, + commit_to_oid); if (edge_value >= 0) edge_value += ctx->new_num_commits_in_base; @@ -1074,10 +1113,10 @@ static int write_graph_chunk_data(struct hashfile *f, else if (parent->next) edge_value = GRAPH_EXTRA_EDGES_NEEDED | num_extra_edges; else { - edge_value = sha1_pos(parent->item->object.oid.hash, - ctx->commits.list, - ctx->commits.nr, - commit_to_sha1); + edge_value = oid_pos(&parent->item->object.oid, + ctx->commits.list, + ctx->commits.nr, + commit_to_oid); if (edge_value >= 0) edge_value += ctx->new_num_commits_in_base; @@ -1109,7 +1148,7 @@ static int write_graph_chunk_data(struct hashfile *f, else packedDate[0] = 0; - packedDate[0] |= htonl(commit_graph_data_at(*list)->generation << 2); + packedDate[0] |= htonl(*topo_level_slab_at(ctx->topo_levels, *list) << 2); packedDate[1] = htonl((*list)->date); hashwrite(f, packedDate, 8); @@ -1120,9 +1159,53 @@ static int write_graph_chunk_data(struct hashfile *f, return 0; } +static int write_graph_chunk_generation_data(struct hashfile *f, + void *data) +{ + struct write_commit_graph_context *ctx = data; + int i, num_generation_data_overflows = 0; + + for (i = 0; i < ctx->commits.nr; i++) { + struct commit *c = ctx->commits.list[i]; + timestamp_t offset; + repo_parse_commit(ctx->r, c); + offset = commit_graph_data_at(c)->generation - c->date; + display_progress(ctx->progress, ++ctx->progress_cnt); + + if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) { + offset = CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW | num_generation_data_overflows; + num_generation_data_overflows++; + } + + hashwrite_be32(f, offset); + } + + return 0; +} + +static int write_graph_chunk_generation_data_overflow(struct hashfile *f, + void *data) +{ + struct write_commit_graph_context *ctx = data; + int i; + for (i = 0; i < ctx->commits.nr; i++) { + struct commit *c = ctx->commits.list[i]; + timestamp_t offset = commit_graph_data_at(c)->generation - c->date; + display_progress(ctx->progress, ++ctx->progress_cnt); + + if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) { + hashwrite_be32(f, offset >> 32); + hashwrite_be32(f, (uint32_t) offset); + } + } + + return 0; +} + static int write_graph_chunk_extra_edges(struct hashfile *f, - struct write_commit_graph_context *ctx) + void *data) { + struct write_commit_graph_context *ctx = data; struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; struct commit_list *parent; @@ -1143,10 +1226,10 @@ static int write_graph_chunk_extra_edges(struct hashfile *f, /* Since num_parents > 2, this initializer is safe. */ for (parent = (*list)->parents->next; parent; parent = parent->next) { - int edge_value = sha1_pos(parent->item->object.oid.hash, - ctx->commits.list, - ctx->commits.nr, - commit_to_sha1); + int edge_value = oid_pos(&parent->item->object.oid, + ctx->commits.list, + ctx->commits.nr, + commit_to_oid); if (edge_value >= 0) edge_value += ctx->new_num_commits_in_base; @@ -1175,8 +1258,9 @@ static int write_graph_chunk_extra_edges(struct hashfile *f, } static int write_graph_chunk_bloom_indexes(struct hashfile *f, - struct write_commit_graph_context *ctx) + void *data) { + struct write_commit_graph_context *ctx = data; struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; uint32_t cur_pos = 0; @@ -1210,8 +1294,9 @@ static void trace2_bloom_filter_settings(struct write_commit_graph_context *ctx) } static int write_graph_chunk_bloom_data(struct hashfile *f, - struct write_commit_graph_context *ctx) + void *data) { + struct write_commit_graph_context *ctx = data; struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; @@ -1306,11 +1391,11 @@ static void close_reachable(struct write_commit_graph_context *ctx) if (!commit) continue; if (ctx->split) { - if ((!parse_commit(commit) && + if ((!repo_parse_commit(ctx->r, commit) && commit_graph_position(commit) == COMMIT_NOT_FROM_GRAPH) || flags == COMMIT_GRAPH_SPLIT_REPLACE) add_missing_parents(ctx, commit); - } else if (!parse_commit_no_graph(commit)) + } else if (!repo_parse_commit_no_graph(ctx->r, commit)) add_missing_parents(ctx, commit); } stop_progress(&ctx->progress); @@ -1329,6 +1414,59 @@ static void close_reachable(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); } +static void compute_topological_levels(struct write_commit_graph_context *ctx) +{ + int i; + struct commit_list *list = NULL; + + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Computing commit graph topological levels"), + ctx->commits.nr); + for (i = 0; i < ctx->commits.nr; i++) { + struct commit *c = ctx->commits.list[i]; + uint32_t level; + + repo_parse_commit(ctx->r, c); + level = *topo_level_slab_at(ctx->topo_levels, c); + + display_progress(ctx->progress, i + 1); + if (level != GENERATION_NUMBER_ZERO) + continue; + + commit_list_insert(c, &list); + while (list) { + struct commit *current = list->item; + struct commit_list *parent; + int all_parents_computed = 1; + uint32_t max_level = 0; + + for (parent = current->parents; parent; parent = parent->next) { + repo_parse_commit(ctx->r, parent->item); + level = *topo_level_slab_at(ctx->topo_levels, parent->item); + + if (level == GENERATION_NUMBER_ZERO) { + all_parents_computed = 0; + commit_list_insert(parent->item, &list); + break; + } + + if (level > max_level) + max_level = level; + } + + if (all_parents_computed) { + pop_commit(&list); + + if (max_level > GENERATION_NUMBER_V1_MAX - 1) + max_level = GENERATION_NUMBER_V1_MAX - 1; + *topo_level_slab_at(ctx->topo_levels, current) = max_level + 1; + } + } + } + stop_progress(&ctx->progress); +} + static void compute_generation_numbers(struct write_commit_graph_context *ctx) { int i; @@ -1338,42 +1476,56 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) ctx->progress = start_delayed_progress( _("Computing commit graph generation numbers"), ctx->commits.nr); + + if (!ctx->trust_generation_numbers) { + for (i = 0; i < ctx->commits.nr; i++) { + struct commit *c = ctx->commits.list[i]; + repo_parse_commit(ctx->r, c); + commit_graph_data_at(c)->generation = GENERATION_NUMBER_ZERO; + } + } + for (i = 0; i < ctx->commits.nr; i++) { - uint32_t generation = commit_graph_data_at(ctx->commits.list[i])->generation; + struct commit *c = ctx->commits.list[i]; + timestamp_t corrected_commit_date; + + repo_parse_commit(ctx->r, c); + corrected_commit_date = commit_graph_data_at(c)->generation; display_progress(ctx->progress, i + 1); - if (generation != GENERATION_NUMBER_INFINITY && - generation != GENERATION_NUMBER_ZERO) + if (corrected_commit_date != GENERATION_NUMBER_ZERO) continue; - commit_list_insert(ctx->commits.list[i], &list); + commit_list_insert(c, &list); while (list) { struct commit *current = list->item; struct commit_list *parent; int all_parents_computed = 1; - uint32_t max_generation = 0; + timestamp_t max_corrected_commit_date = 0; for (parent = current->parents; parent; parent = parent->next) { - generation = commit_graph_data_at(parent->item)->generation; + repo_parse_commit(ctx->r, parent->item); + corrected_commit_date = commit_graph_data_at(parent->item)->generation; - if (generation == GENERATION_NUMBER_INFINITY || - generation == GENERATION_NUMBER_ZERO) { + if (corrected_commit_date == GENERATION_NUMBER_ZERO) { all_parents_computed = 0; commit_list_insert(parent->item, &list); break; - } else if (generation > max_generation) { - max_generation = generation; } + + if (corrected_commit_date > max_corrected_commit_date) + max_corrected_commit_date = corrected_commit_date; } if (all_parents_computed) { - struct commit_graph_data *data = commit_graph_data_at(current); - - data->generation = max_generation + 1; pop_commit(&list); - if (data->generation > GENERATION_NUMBER_MAX) - data->generation = GENERATION_NUMBER_MAX; + if (current->date && current->date > max_corrected_commit_date) + max_corrected_commit_date = current->date - 1; + commit_graph_data_at(current)->generation = max_corrected_commit_date + 1; + + if (commit_graph_data_at(current)->generation - current->date > GENERATION_NUMBER_V2_OFFSET_MAX) + ctx->num_generation_data_overflows++; } } } @@ -1458,7 +1610,7 @@ static int add_ref_to_set(const char *refname, struct object_id peeled; struct refs_cb_data *data = (struct refs_cb_data *)cb_data; - if (!peel_ref(refname, &peeled)) + if (!peel_iterated_oid(oid, &peeled)) oid = &peeled; if (oid_object_info(the_repository, oid, NULL) == OBJ_COMMIT) oidset_insert(data->commits, oid); @@ -1593,9 +1745,9 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx) continue; if (ctx->split && flags == COMMIT_GRAPH_SPLIT_REPLACE) - parse_commit(ctx->commits.list[ctx->commits.nr]); + repo_parse_commit(ctx->r, ctx->commits.list[ctx->commits.nr]); else - parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]); + repo_parse_commit_no_graph(ctx->r, ctx->commits.list[ctx->commits.nr]); num_parents = commit_list_count(ctx->commits.list[ctx->commits.nr]->parents); if (num_parents > 2) @@ -1620,8 +1772,9 @@ static int write_graph_chunk_base_1(struct hashfile *f, } static int write_graph_chunk_base(struct hashfile *f, - struct write_commit_graph_context *ctx) + void *data) { + struct write_commit_graph_context *ctx = data; int num = write_graph_chunk_base_1(f, ctx->new_base_graph); if (num != ctx->num_commit_graphs_after - 1) { @@ -1632,27 +1785,16 @@ static int write_graph_chunk_base(struct hashfile *f, return 0; } -typedef int (*chunk_write_fn)(struct hashfile *f, - struct write_commit_graph_context *ctx); - -struct chunk_info { - uint32_t id; - uint64_t size; - chunk_write_fn write_fn; -}; - static int write_commit_graph_file(struct write_commit_graph_context *ctx) { uint32_t i; int fd; struct hashfile *f; struct lock_file lk = LOCK_INIT; - struct chunk_info chunks[MAX_NUM_CHUNKS + 1]; const unsigned hashsz = the_hash_algo->rawsz; struct strbuf progress_title = STRBUF_INIT; - int num_chunks = 3; - uint64_t chunk_offset; - struct object_id file_hash; + struct chunkfile *cf; + unsigned char file_hash[GIT_MAX_RAWSZ]; if (ctx->split) { struct strbuf tmp_file = STRBUF_INIT; @@ -1698,83 +1840,60 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) f = hashfd(fd, get_lock_file_path(&lk)); } - chunks[0].id = GRAPH_CHUNKID_OIDFANOUT; - chunks[0].size = GRAPH_FANOUT_SIZE; - chunks[0].write_fn = write_graph_chunk_fanout; - chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP; - chunks[1].size = hashsz * ctx->commits.nr; - chunks[1].write_fn = write_graph_chunk_oids; - chunks[2].id = GRAPH_CHUNKID_DATA; - chunks[2].size = (hashsz + 16) * ctx->commits.nr; - chunks[2].write_fn = write_graph_chunk_data; - if (ctx->num_extra_edges) { - chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES; - chunks[num_chunks].size = 4 * ctx->num_extra_edges; - chunks[num_chunks].write_fn = write_graph_chunk_extra_edges; - num_chunks++; - } + cf = init_chunkfile(f); + + add_chunk(cf, GRAPH_CHUNKID_OIDFANOUT, GRAPH_FANOUT_SIZE, + write_graph_chunk_fanout); + add_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, hashsz * ctx->commits.nr, + write_graph_chunk_oids); + add_chunk(cf, GRAPH_CHUNKID_DATA, (hashsz + 16) * ctx->commits.nr, + write_graph_chunk_data); + + if (ctx->write_generation_data) + add_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA, + sizeof(uint32_t) * ctx->commits.nr, + write_graph_chunk_generation_data); + if (ctx->num_generation_data_overflows) + add_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW, + sizeof(timestamp_t) * ctx->num_generation_data_overflows, + write_graph_chunk_generation_data_overflow); + if (ctx->num_extra_edges) + add_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, + 4 * ctx->num_extra_edges, + write_graph_chunk_extra_edges); if (ctx->changed_paths) { - chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES; - chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr; - chunks[num_chunks].write_fn = write_graph_chunk_bloom_indexes; - num_chunks++; - chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA; - chunks[num_chunks].size = sizeof(uint32_t) * 3 - + ctx->total_bloom_filter_data_size; - chunks[num_chunks].write_fn = write_graph_chunk_bloom_data; - num_chunks++; - } - if (ctx->num_commit_graphs_after > 1) { - chunks[num_chunks].id = GRAPH_CHUNKID_BASE; - chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1); - chunks[num_chunks].write_fn = write_graph_chunk_base; - num_chunks++; - } - - chunks[num_chunks].id = 0; - chunks[num_chunks].size = 0; + add_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES, + sizeof(uint32_t) * ctx->commits.nr, + write_graph_chunk_bloom_indexes); + add_chunk(cf, GRAPH_CHUNKID_BLOOMDATA, + sizeof(uint32_t) * 3 + + ctx->total_bloom_filter_data_size, + write_graph_chunk_bloom_data); + } + if (ctx->num_commit_graphs_after > 1) + add_chunk(cf, GRAPH_CHUNKID_BASE, + hashsz * (ctx->num_commit_graphs_after - 1), + write_graph_chunk_base); hashwrite_be32(f, GRAPH_SIGNATURE); hashwrite_u8(f, GRAPH_VERSION); hashwrite_u8(f, oid_version()); - hashwrite_u8(f, num_chunks); + hashwrite_u8(f, get_num_chunks(cf)); hashwrite_u8(f, ctx->num_commit_graphs_after - 1); - chunk_offset = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; - for (i = 0; i <= num_chunks; i++) { - uint32_t chunk_write[3]; - - chunk_write[0] = htonl(chunks[i].id); - chunk_write[1] = htonl(chunk_offset >> 32); - chunk_write[2] = htonl(chunk_offset & 0xffffffff); - hashwrite(f, chunk_write, 12); - - chunk_offset += chunks[i].size; - } - if (ctx->report_progress) { strbuf_addf(&progress_title, Q_("Writing out commit graph in %d pass", "Writing out commit graph in %d passes", - num_chunks), - num_chunks); + get_num_chunks(cf)), + get_num_chunks(cf)); ctx->progress = start_delayed_progress( progress_title.buf, - num_chunks * ctx->commits.nr); + get_num_chunks(cf) * ctx->commits.nr); } - for (i = 0; i < num_chunks; i++) { - uint64_t start_offset = f->total + f->offset; - - if (chunks[i].write_fn(f, ctx)) - return -1; - - if (f->total + f->offset != start_offset + chunks[i].size) - BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead", - chunks[i].size, chunks[i].id, - f->total + f->offset - start_offset); - } + write_chunkfile(cf, ctx); stop_progress(&ctx->progress); strbuf_release(&progress_title); @@ -1790,7 +1909,8 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) } close_commit_graph(ctx->r->objects); - finalize_hashfile(f, file_hash.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC); + finalize_hashfile(f, file_hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC); + free_chunkfile(cf); if (ctx->split) { FILE *chainf = fdopen_lock_file(&lk, "w"); @@ -1825,7 +1945,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) unlink(graph_name); } - ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1] = xstrdup(oid_to_hex(&file_hash)); + ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1] = xstrdup(hash_to_hex(file_hash)); final_graph_name = get_split_graph_filename(ctx->odb, ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1]); ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 1] = final_graph_name; @@ -1918,6 +2038,13 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) if (i < ctx->num_commit_graphs_after) ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid)); + /* + * If the topmost remaining layer has generation data chunk, the + * resultant layer also has generation data chunk. + */ + if (i == ctx->num_commit_graphs_after - 2) + ctx->write_generation_data = !!g->chunk_generation_data; + i--; g = g->base_graph; } @@ -2104,28 +2231,32 @@ int write_commit_graph(struct object_directory *odb, enum commit_graph_write_flags flags, const struct commit_graph_opts *opts) { + struct repository *r = the_repository; struct write_commit_graph_context *ctx; uint32_t i; int res = 0; int replace = 0; struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS; + struct topo_level_slab topo_levels; - prepare_repo_settings(the_repository); - if (!the_repository->settings.core_commit_graph) { + prepare_repo_settings(r); + if (!r->settings.core_commit_graph) { warning(_("attempting to write a commit-graph, but 'core.commitGraph' is disabled")); return 0; } - if (!commit_graph_compatible(the_repository)) + if (!commit_graph_compatible(r)) return 0; - ctx = xcalloc(1, sizeof(struct write_commit_graph_context)); - ctx->r = the_repository; + CALLOC_ARRAY(ctx, 1); + ctx->r = r; ctx->odb = odb; ctx->append = flags & COMMIT_GRAPH_WRITE_APPEND ? 1 : 0; ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0; ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0; ctx->opts = opts; ctx->total_bloom_filter_data_size = 0; + ctx->write_generation_data = (get_configured_generation_version(r) == 2); + ctx->num_generation_data_overflows = 0; bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY", bloom_settings.bits_per_entry); @@ -2135,11 +2266,23 @@ int write_commit_graph(struct object_directory *odb, bloom_settings.max_changed_paths); ctx->bloom_settings = &bloom_settings; + init_topo_level_slab(&topo_levels); + ctx->topo_levels = &topo_levels; + + prepare_commit_graph(ctx->r); + if (ctx->r->objects->commit_graph) { + struct commit_graph *g = ctx->r->objects->commit_graph; + + while (g) { + g->topo_levels = &topo_levels; + g = g->base_graph; + } + } + if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS) ctx->changed_paths = 1; if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) { struct commit_graph *g; - prepare_commit_graph_one(ctx->r, ctx->odb); g = ctx->r->objects->commit_graph; @@ -2151,10 +2294,7 @@ int write_commit_graph(struct object_directory *odb, } if (ctx->split) { - struct commit_graph *g; - prepare_commit_graph(ctx->r); - - g = ctx->r->objects->commit_graph; + struct commit_graph *g = ctx->r->objects->commit_graph; while (g) { ctx->num_commit_graphs_before++; @@ -2178,14 +2318,11 @@ int write_commit_graph(struct object_directory *odb, ctx->approx_nr_objects = approximate_object_count(); - if (ctx->append) - prepare_commit_graph_one(ctx->r, ctx->odb); - if (ctx->append && ctx->r->objects->commit_graph) { struct commit_graph *g = ctx->r->objects->commit_graph; for (i = 0; i < g->num_commits; i++) { struct object_id oid; - hashcpy(oid.hash, g->chunk_oid_lookup + g->hash_len * i); + oidread(&oid, g->chunk_oid_lookup + g->hash_len * i); oid_array_append(&ctx->oids, &oid); } } @@ -2227,7 +2364,11 @@ int write_commit_graph(struct object_directory *odb, } else ctx->num_commit_graphs_after = 1; - compute_generation_numbers(ctx); + ctx->trust_generation_numbers = validate_mixed_generation_chain(ctx->r->objects->commit_graph); + + compute_topological_levels(ctx); + if (ctx->write_generation_data) + compute_generation_numbers(ctx); if (ctx->changed_paths) compute_bloom_filters(ctx); @@ -2243,6 +2384,7 @@ cleanup: free(ctx->graph_name); free(ctx->commits.list); oid_array_clear(&ctx->oids); + clear_topo_level_slab(&topo_levels); if (ctx->commit_graph_filenames_after) { for (i = 0; i < ctx->num_commit_graphs_after; i++) { @@ -2283,7 +2425,8 @@ static void graph_report(const char *fmt, ...) int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) { uint32_t i, cur_fanout_pos = 0; - struct object_id prev_oid, cur_oid, checksum; + struct object_id prev_oid, cur_oid; + unsigned char checksum[GIT_MAX_HEXSZ]; int generation_zero = 0; struct hashfile *f; int devnull; @@ -2302,8 +2445,8 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) devnull = open("/dev/null", O_WRONLY); f = hashfd(devnull, NULL); hashwrite(f, g->data, g->data_len - g->hash_len); - finalize_hashfile(f, checksum.hash, CSUM_CLOSE); - if (!hasheq(checksum.hash, g->data + g->data_len - g->hash_len)) { + finalize_hashfile(f, checksum, CSUM_CLOSE); + if (!hasheq(checksum, g->data + g->data_len - g->hash_len)) { graph_report(_("the commit-graph file has incorrect checksum and is likely corrupt")); verify_commit_graph_error = VERIFY_COMMIT_GRAPH_ERROR_HASH; } @@ -2311,7 +2454,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) for (i = 0; i < g->num_commits; i++) { struct commit *graph_commit; - hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i); + oidread(&cur_oid, g->chunk_oid_lookup + g->hash_len * i); if (i && oidcmp(&prev_oid, &cur_oid) >= 0) graph_report(_("commit-graph has incorrect OID order: %s then %s"), @@ -2355,11 +2498,11 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) for (i = 0; i < g->num_commits; i++) { struct commit *graph_commit, *odb_commit; struct commit_list *graph_parents, *odb_parents; - uint32_t max_generation = 0; - uint32_t generation; + timestamp_t max_generation = 0; + timestamp_t generation; display_progress(progress, i + 1); - hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i); + oidread(&cur_oid, g->chunk_oid_lookup + g->hash_len * i); graph_commit = lookup_commit(r, &cur_oid); odb_commit = (struct commit *)create_object(r, &cur_oid, alloc_commit_node(r)); @@ -2420,16 +2563,17 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) continue; /* - * If one of our parents has generation GENERATION_NUMBER_MAX, then - * our generation is also GENERATION_NUMBER_MAX. Decrement to avoid - * extra logic in the following condition. + * If we are using topological level and one of our parents has + * generation GENERATION_NUMBER_V1_MAX, then our generation is + * also GENERATION_NUMBER_V1_MAX. Decrement to avoid extra logic + * in the following condition. */ - if (max_generation == GENERATION_NUMBER_MAX) + if (!g->read_generation_data && max_generation == GENERATION_NUMBER_V1_MAX) max_generation--; generation = commit_graph_generation(graph_commit); - if (generation != max_generation + 1) - graph_report(_("commit-graph generation for commit %s is %u != %u"), + if (generation < max_generation + 1) + graph_report(_("commit-graph generation for commit %s is %"PRItime" < %"PRItime), oid_to_hex(&cur_oid), generation, max_generation + 1); |