diff options
Diffstat (limited to 'commit-graph.c')
-rw-r--r-- | commit-graph.c | 359 |
1 files changed, 294 insertions, 65 deletions
diff --git a/commit-graph.c b/commit-graph.c index 6541060271..ed31843fa5 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -38,11 +38,13 @@ void git_test_write_commit_graph_or_die(void) #define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */ #define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ #define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */ +#define GRAPH_CHUNKID_GENERATION_DATA 0x47444154 /* "GDAT" */ +#define GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW 0x47444f56 /* "GDOV" */ #define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */ #define GRAPH_CHUNKID_BLOOMINDEXES 0x42494458 /* "BIDX" */ #define GRAPH_CHUNKID_BLOOMDATA 0x42444154 /* "BDAT" */ #define GRAPH_CHUNKID_BASE 0x42415345 /* "BASE" */ -#define MAX_NUM_CHUNKS 7 +#define MAX_NUM_CHUNKS 9 #define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16) @@ -61,9 +63,13 @@ void git_test_write_commit_graph_or_die(void) #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \ + GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) +#define CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW (1ULL << 31) + /* Remember to update object flag allocation in object.h */ #define REACHABLE (1u<<15) +define_commit_slab(topo_level_slab, uint32_t); + /* Keep track of the order in which commits are added to our list. */ define_commit_slab(commit_pos, int); static struct commit_pos commit_pos = COMMIT_SLAB_INIT(1, commit_pos); @@ -99,7 +105,7 @@ uint32_t commit_graph_position(const struct commit *c) return data ? data->graph_pos : COMMIT_NOT_FROM_GRAPH; } -uint32_t commit_graph_generation(const struct commit *c) +timestamp_t commit_graph_generation(const struct commit *c) { struct commit_graph_data *data = commit_graph_data_slab_peek(&commit_graph_data_slab, c); @@ -139,13 +145,17 @@ static struct commit_graph_data *commit_graph_data_at(const struct commit *c) return data; } +/* + * Should be used only while writing commit-graph as it compares + * generation value of commits by directly accessing commit-slab. + */ static int commit_gen_cmp(const void *va, const void *vb) { const struct commit *a = *(const struct commit **)va; const struct commit *b = *(const struct commit **)vb; - uint32_t generation_a = commit_graph_generation(a); - uint32_t generation_b = commit_graph_generation(b); + const timestamp_t generation_a = commit_graph_data_at(a)->generation; + const timestamp_t generation_b = commit_graph_data_at(b)->generation; /* lower generation commits first */ if (generation_a < generation_b) return -1; @@ -205,16 +215,24 @@ static int commit_graph_compatible(struct repository *r) if (read_replace_refs) { prepare_replace_object(r); - if (hashmap_get_size(&r->objects->replace_map->map)) + if (hashmap_get_size(&r->objects->replace_map->map)) { + warning(_("repository contains replace objects; " + "skipping commit-graph")); return 0; + } } prepare_commit_graft(r); if (r->parsed_objects && - (r->parsed_objects->grafts_nr || r->parsed_objects->substituted_parent)) + (r->parsed_objects->grafts_nr || r->parsed_objects->substituted_parent)) { + warning(_("repository contains (deprecated) grafts; " + "skipping commit-graph")); return 0; - if (is_repository_shallow(r)) + } + if (is_repository_shallow(r)) { + warning(_("repository is shallow; skipping commit-graph")); return 0; + } return 1; } @@ -388,6 +406,20 @@ struct commit_graph *parse_commit_graph(struct repository *r, graph->chunk_commit_data = data + chunk_offset; break; + case GRAPH_CHUNKID_GENERATION_DATA: + if (graph->chunk_generation_data) + chunk_repeated = 1; + else + graph->chunk_generation_data = data + chunk_offset; + break; + + case GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW: + if (graph->chunk_generation_data_overflow) + chunk_repeated = 1; + else + graph->chunk_generation_data_overflow = data + chunk_offset; + break; + case GRAPH_CHUNKID_EXTRAEDGES: if (graph->chunk_extra_edges) chunk_repeated = 1; @@ -590,6 +622,31 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r, return graph_chain; } +/* + * returns 1 if and only if all graphs in the chain have + * corrected commit dates stored in the generation_data chunk. + */ +static int validate_mixed_generation_chain(struct commit_graph *g) +{ + int read_generation_data = 1; + struct commit_graph *p = g; + + while (read_generation_data && p) { + read_generation_data = p->read_generation_data; + p = p->base_graph; + } + + if (read_generation_data) + return 1; + + while (g) { + g->read_generation_data = 0; + g = g->base_graph; + } + + return 0; +} + struct commit_graph *read_commit_graph_one(struct repository *r, struct object_directory *odb) { @@ -598,6 +655,8 @@ struct commit_graph *read_commit_graph_one(struct repository *r, if (!g) g = load_commit_graph_chain(r, odb); + validate_mixed_generation_chain(g); + return g; } @@ -673,6 +732,20 @@ int generation_numbers_enabled(struct repository *r) return !!first_generation; } +int corrected_commit_dates_enabled(struct repository *r) +{ + struct commit_graph *g; + if (!prepare_commit_graph(r)) + return 0; + + g = r->objects->commit_graph; + + if (!g->num_commits) + return 0; + + return g->read_generation_data; +} + struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r) { struct commit_graph *g = r->objects->commit_graph; @@ -748,17 +821,41 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, { const unsigned char *commit_data; struct commit_graph_data *graph_data; - uint32_t lex_index; + uint32_t lex_index, offset_pos; + uint64_t date_high, date_low, offset; while (pos < g->num_commits_in_base) g = g->base_graph; + if (pos >= g->num_commits + g->num_commits_in_base) + die(_("invalid commit position. commit-graph is likely corrupt")); + lex_index = pos - g->num_commits_in_base; commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * lex_index; graph_data = commit_graph_data_at(item); graph_data->graph_pos = pos; - graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2; + + date_high = get_be32(commit_data + g->hash_len + 8) & 0x3; + date_low = get_be32(commit_data + g->hash_len + 12); + item->date = (timestamp_t)((date_high << 32) | date_low); + + if (g->read_generation_data) { + offset = (timestamp_t)get_be32(g->chunk_generation_data + sizeof(uint32_t) * lex_index); + + if (offset & CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW) { + if (!g->chunk_generation_data_overflow) + die(_("commit-graph requires overflow generation data but has none")); + + offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW; + graph_data->generation = get_be64(g->chunk_generation_data_overflow + 8 * offset_pos); + } else + graph_data->generation = item->date + offset; + } else + graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2; + + if (g->topo_levels) + *topo_level_slab_at(g->topo_levels, item) = get_be32(commit_data + g->hash_len + 8) >> 2; } static inline void set_commit_tree(struct commit *c, struct tree *t) @@ -772,38 +869,22 @@ static int fill_commit_in_graph(struct repository *r, { uint32_t edge_value; uint32_t *parent_data_ptr; - uint64_t date_low, date_high; struct commit_list **pptr; - struct commit_graph_data *graph_data; const unsigned char *commit_data; uint32_t lex_index; while (pos < g->num_commits_in_base) g = g->base_graph; - if (pos >= g->num_commits + g->num_commits_in_base) - die(_("invalid commit position. commit-graph is likely corrupt")); + fill_commit_graph_info(item, g, pos); - /* - * Store the "full" position, but then use the - * "local" position for the rest of the calculation. - */ - graph_data = commit_graph_data_at(item); - graph_data->graph_pos = pos; lex_index = pos - g->num_commits_in_base; - commit_data = g->chunk_commit_data + (g->hash_len + 16) * lex_index; item->object.parsed = 1; set_commit_tree(item, NULL); - date_high = get_be32(commit_data + g->hash_len + 8) & 0x3; - date_low = get_be32(commit_data + g->hash_len + 12); - item->date = (timestamp_t)((date_high << 32) | date_low); - - graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2; - pptr = &item->parents; edge_value = get_be32(commit_data + g->hash_len); @@ -943,6 +1024,7 @@ struct write_commit_graph_context { struct oid_array oids; struct packed_commit_list commits; int num_extra_edges; + int num_generation_data_overflows; unsigned long approx_nr_objects; struct progress *progress; int progress_done; @@ -961,8 +1043,11 @@ struct write_commit_graph_context { report_progress:1, split:1, changed_paths:1, - order_by_pack:1; + order_by_pack:1, + write_generation_data:1, + trust_generation_numbers:1; + struct topo_level_slab *topo_levels; const struct commit_graph_opts *opts; size_t total_bloom_filter_data_size; const struct bloom_filter_settings *bloom_settings; @@ -1032,7 +1117,7 @@ static int write_graph_chunk_data(struct hashfile *f, uint32_t packedDate[2]; display_progress(ctx->progress, ++ctx->progress_cnt); - if (parse_commit_no_graph(*list)) + if (repo_parse_commit_no_graph(ctx->r, *list)) die(_("unable to parse commit %s"), oid_to_hex(&(*list)->object.oid)); tree = get_commit_tree_oid(*list); @@ -1109,7 +1194,7 @@ static int write_graph_chunk_data(struct hashfile *f, else packedDate[0] = 0; - packedDate[0] |= htonl(commit_graph_data_at(*list)->generation << 2); + packedDate[0] |= htonl(*topo_level_slab_at(ctx->topo_levels, *list) << 2); packedDate[1] = htonl((*list)->date); hashwrite(f, packedDate, 8); @@ -1120,6 +1205,47 @@ static int write_graph_chunk_data(struct hashfile *f, return 0; } +static int write_graph_chunk_generation_data(struct hashfile *f, + struct write_commit_graph_context *ctx) +{ + int i, num_generation_data_overflows = 0; + + for (i = 0; i < ctx->commits.nr; i++) { + struct commit *c = ctx->commits.list[i]; + timestamp_t offset; + repo_parse_commit(ctx->r, c); + offset = commit_graph_data_at(c)->generation - c->date; + display_progress(ctx->progress, ++ctx->progress_cnt); + + if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) { + offset = CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW | num_generation_data_overflows; + num_generation_data_overflows++; + } + + hashwrite_be32(f, offset); + } + + return 0; +} + +static int write_graph_chunk_generation_data_overflow(struct hashfile *f, + struct write_commit_graph_context *ctx) +{ + int i; + for (i = 0; i < ctx->commits.nr; i++) { + struct commit *c = ctx->commits.list[i]; + timestamp_t offset = commit_graph_data_at(c)->generation - c->date; + display_progress(ctx->progress, ++ctx->progress_cnt); + + if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) { + hashwrite_be32(f, offset >> 32); + hashwrite_be32(f, (uint32_t) offset); + } + } + + return 0; +} + static int write_graph_chunk_extra_edges(struct hashfile *f, struct write_commit_graph_context *ctx) { @@ -1306,11 +1432,11 @@ static void close_reachable(struct write_commit_graph_context *ctx) if (!commit) continue; if (ctx->split) { - if ((!parse_commit(commit) && + if ((!repo_parse_commit(ctx->r, commit) && commit_graph_position(commit) == COMMIT_NOT_FROM_GRAPH) || flags == COMMIT_GRAPH_SPLIT_REPLACE) add_missing_parents(ctx, commit); - } else if (!parse_commit_no_graph(commit)) + } else if (!repo_parse_commit_no_graph(ctx->r, commit)) add_missing_parents(ctx, commit); } stop_progress(&ctx->progress); @@ -1329,6 +1455,59 @@ static void close_reachable(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); } +static void compute_topological_levels(struct write_commit_graph_context *ctx) +{ + int i; + struct commit_list *list = NULL; + + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Computing commit graph topological levels"), + ctx->commits.nr); + for (i = 0; i < ctx->commits.nr; i++) { + struct commit *c = ctx->commits.list[i]; + uint32_t level; + + repo_parse_commit(ctx->r, c); + level = *topo_level_slab_at(ctx->topo_levels, c); + + display_progress(ctx->progress, i + 1); + if (level != GENERATION_NUMBER_ZERO) + continue; + + commit_list_insert(c, &list); + while (list) { + struct commit *current = list->item; + struct commit_list *parent; + int all_parents_computed = 1; + uint32_t max_level = 0; + + for (parent = current->parents; parent; parent = parent->next) { + repo_parse_commit(ctx->r, parent->item); + level = *topo_level_slab_at(ctx->topo_levels, parent->item); + + if (level == GENERATION_NUMBER_ZERO) { + all_parents_computed = 0; + commit_list_insert(parent->item, &list); + break; + } + + if (level > max_level) + max_level = level; + } + + if (all_parents_computed) { + pop_commit(&list); + + if (max_level > GENERATION_NUMBER_V1_MAX - 1) + max_level = GENERATION_NUMBER_V1_MAX - 1; + *topo_level_slab_at(ctx->topo_levels, current) = max_level + 1; + } + } + } + stop_progress(&ctx->progress); +} + static void compute_generation_numbers(struct write_commit_graph_context *ctx) { int i; @@ -1338,42 +1517,56 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) ctx->progress = start_delayed_progress( _("Computing commit graph generation numbers"), ctx->commits.nr); + + if (!ctx->trust_generation_numbers) { + for (i = 0; i < ctx->commits.nr; i++) { + struct commit *c = ctx->commits.list[i]; + repo_parse_commit(ctx->r, c); + commit_graph_data_at(c)->generation = GENERATION_NUMBER_ZERO; + } + } + for (i = 0; i < ctx->commits.nr; i++) { - uint32_t generation = commit_graph_data_at(ctx->commits.list[i])->generation; + struct commit *c = ctx->commits.list[i]; + timestamp_t corrected_commit_date; + + repo_parse_commit(ctx->r, c); + corrected_commit_date = commit_graph_data_at(c)->generation; display_progress(ctx->progress, i + 1); - if (generation != GENERATION_NUMBER_INFINITY && - generation != GENERATION_NUMBER_ZERO) + if (corrected_commit_date != GENERATION_NUMBER_ZERO) continue; - commit_list_insert(ctx->commits.list[i], &list); + commit_list_insert(c, &list); while (list) { struct commit *current = list->item; struct commit_list *parent; int all_parents_computed = 1; - uint32_t max_generation = 0; + timestamp_t max_corrected_commit_date = 0; for (parent = current->parents; parent; parent = parent->next) { - generation = commit_graph_data_at(parent->item)->generation; + repo_parse_commit(ctx->r, parent->item); + corrected_commit_date = commit_graph_data_at(parent->item)->generation; - if (generation == GENERATION_NUMBER_INFINITY || - generation == GENERATION_NUMBER_ZERO) { + if (corrected_commit_date == GENERATION_NUMBER_ZERO) { all_parents_computed = 0; commit_list_insert(parent->item, &list); break; - } else if (generation > max_generation) { - max_generation = generation; } + + if (corrected_commit_date > max_corrected_commit_date) + max_corrected_commit_date = corrected_commit_date; } if (all_parents_computed) { - struct commit_graph_data *data = commit_graph_data_at(current); - - data->generation = max_generation + 1; pop_commit(&list); - if (data->generation > GENERATION_NUMBER_MAX) - data->generation = GENERATION_NUMBER_MAX; + if (current->date && current->date > max_corrected_commit_date) + max_corrected_commit_date = current->date - 1; + commit_graph_data_at(current)->generation = max_corrected_commit_date + 1; + + if (commit_graph_data_at(current)->generation - current->date > GENERATION_NUMBER_V2_OFFSET_MAX) + ctx->num_generation_data_overflows++; } } } @@ -1593,9 +1786,9 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx) continue; if (ctx->split && flags == COMMIT_GRAPH_SPLIT_REPLACE) - parse_commit(ctx->commits.list[ctx->commits.nr]); + repo_parse_commit(ctx->r, ctx->commits.list[ctx->commits.nr]); else - parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]); + repo_parse_commit_no_graph(ctx->r, ctx->commits.list[ctx->commits.nr]); num_parents = commit_list_count(ctx->commits.list[ctx->commits.nr]->parents); if (num_parents > 2) @@ -1707,6 +1900,21 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) chunks[2].id = GRAPH_CHUNKID_DATA; chunks[2].size = (hashsz + 16) * ctx->commits.nr; chunks[2].write_fn = write_graph_chunk_data; + + if (git_env_bool(GIT_TEST_COMMIT_GRAPH_NO_GDAT, 0)) + ctx->write_generation_data = 0; + if (ctx->write_generation_data) { + chunks[num_chunks].id = GRAPH_CHUNKID_GENERATION_DATA; + chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr; + chunks[num_chunks].write_fn = write_graph_chunk_generation_data; + num_chunks++; + } + if (ctx->num_generation_data_overflows) { + chunks[num_chunks].id = GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW; + chunks[num_chunks].size = sizeof(timestamp_t) * ctx->num_generation_data_overflows; + chunks[num_chunks].write_fn = write_graph_chunk_generation_data_overflow; + num_chunks++; + } if (ctx->num_extra_edges) { chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES; chunks[num_chunks].size = 4 * ctx->num_extra_edges; @@ -1918,6 +2126,13 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) if (i < ctx->num_commit_graphs_after) ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid)); + /* + * If the topmost remaining layer has generation data chunk, the + * resultant layer also has generation data chunk. + */ + if (i == ctx->num_commit_graphs_after - 2) + ctx->write_generation_data = !!g->chunk_generation_data; + i--; g = g->base_graph; } @@ -2109,6 +2324,7 @@ int write_commit_graph(struct object_directory *odb, int res = 0; int replace = 0; struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS; + struct topo_level_slab topo_levels; prepare_repo_settings(the_repository); if (!the_repository->settings.core_commit_graph) { @@ -2126,6 +2342,8 @@ int write_commit_graph(struct object_directory *odb, ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0; ctx->opts = opts; ctx->total_bloom_filter_data_size = 0; + ctx->write_generation_data = 1; + ctx->num_generation_data_overflows = 0; bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY", bloom_settings.bits_per_entry); @@ -2135,11 +2353,23 @@ int write_commit_graph(struct object_directory *odb, bloom_settings.max_changed_paths); ctx->bloom_settings = &bloom_settings; + init_topo_level_slab(&topo_levels); + ctx->topo_levels = &topo_levels; + + prepare_commit_graph(ctx->r); + if (ctx->r->objects->commit_graph) { + struct commit_graph *g = ctx->r->objects->commit_graph; + + while (g) { + g->topo_levels = &topo_levels; + g = g->base_graph; + } + } + if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS) ctx->changed_paths = 1; if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) { struct commit_graph *g; - prepare_commit_graph_one(ctx->r, ctx->odb); g = ctx->r->objects->commit_graph; @@ -2151,10 +2381,7 @@ int write_commit_graph(struct object_directory *odb, } if (ctx->split) { - struct commit_graph *g; - prepare_commit_graph(ctx->r); - - g = ctx->r->objects->commit_graph; + struct commit_graph *g = ctx->r->objects->commit_graph; while (g) { ctx->num_commit_graphs_before++; @@ -2178,9 +2405,6 @@ int write_commit_graph(struct object_directory *odb, ctx->approx_nr_objects = approximate_object_count(); - if (ctx->append) - prepare_commit_graph_one(ctx->r, ctx->odb); - if (ctx->append && ctx->r->objects->commit_graph) { struct commit_graph *g = ctx->r->objects->commit_graph; for (i = 0; i < g->num_commits; i++) { @@ -2227,7 +2451,11 @@ int write_commit_graph(struct object_directory *odb, } else ctx->num_commit_graphs_after = 1; - compute_generation_numbers(ctx); + ctx->trust_generation_numbers = validate_mixed_generation_chain(ctx->r->objects->commit_graph); + + compute_topological_levels(ctx); + if (ctx->write_generation_data) + compute_generation_numbers(ctx); if (ctx->changed_paths) compute_bloom_filters(ctx); @@ -2355,8 +2583,8 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) for (i = 0; i < g->num_commits; i++) { struct commit *graph_commit, *odb_commit; struct commit_list *graph_parents, *odb_parents; - uint32_t max_generation = 0; - uint32_t generation; + timestamp_t max_generation = 0; + timestamp_t generation; display_progress(progress, i + 1); hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i); @@ -2420,16 +2648,17 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) continue; /* - * If one of our parents has generation GENERATION_NUMBER_MAX, then - * our generation is also GENERATION_NUMBER_MAX. Decrement to avoid - * extra logic in the following condition. + * If we are using topological level and one of our parents has + * generation GENERATION_NUMBER_V1_MAX, then our generation is + * also GENERATION_NUMBER_V1_MAX. Decrement to avoid extra logic + * in the following condition. */ - if (max_generation == GENERATION_NUMBER_MAX) + if (!g->read_generation_data && max_generation == GENERATION_NUMBER_V1_MAX) max_generation--; generation = commit_graph_generation(graph_commit); - if (generation != max_generation + 1) - graph_report(_("commit-graph generation for commit %s is %u != %u"), + if (generation < max_generation + 1) + graph_report(_("commit-graph generation for commit %s is %"PRItime" < %"PRItime), oid_to_hex(&cur_oid), generation, max_generation + 1); |