diff options
author | Junio C Hamano <gitster@pobox.com> | 2020-03-26 17:11:20 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2020-03-26 17:11:20 -0700 |
commit | f8cb64e3d4d512a86c1b7b3aa584f11740b3d038 (patch) | |
tree | a679837356ebc7f598a0073a33ee0aea43edcd1e | |
parent | Merge branch 'pb/recurse-submodules-fix' (diff) | |
parent | fast-import: add options for rewriting submodules (diff) | |
download | tgif-f8cb64e3d4d512a86c1b7b3aa584f11740b3d038.tar.xz |
Merge branch 'bc/sha-256-part-1-of-4'
SHA-256 transition continues.
* bc/sha-256-part-1-of-4: (22 commits)
fast-import: add options for rewriting submodules
fast-import: add a generic function to iterate over marks
fast-import: make find_marks work on any mark set
fast-import: add helper function for inserting mark object entries
fast-import: permit reading multiple marks files
commit: use expected signature header for SHA-256
worktree: allow repository version 1
init-db: move writing repo version into a function
builtin/init-db: add environment variable for new repo hash
builtin/init-db: allow specifying hash algorithm on command line
setup: allow check_repository_format to read repository format
t/helper: make repository tests hash independent
t/helper: initialize repository if necessary
t/helper/test-dump-split-index: initialize git repository
t6300: make hash algorithm independent
t6300: abstract away SHA-1-specific constants
t: use hash-specific lookup tables to define test constants
repository: require a build flag to use SHA-256
hex: add functions to parse hex object IDs in any algorithm
hex: introduce parsing variants taking hash algorithms
...
-rw-r--r-- | Documentation/git-fast-import.txt | 20 | ||||
-rw-r--r-- | Documentation/git-init.txt | 7 | ||||
-rw-r--r-- | Documentation/git.txt | 6 | ||||
-rw-r--r-- | builtin/clone.c | 2 | ||||
-rw-r--r-- | builtin/commit.c | 2 | ||||
-rw-r--r-- | builtin/init-db.c | 75 | ||||
-rw-r--r-- | builtin/pack-objects.c | 2 | ||||
-rw-r--r-- | cache.h | 25 | ||||
-rw-r--r-- | commit.c | 30 | ||||
-rw-r--r-- | config.mak.dev | 2 | ||||
-rw-r--r-- | csum-file.c | 2 | ||||
-rw-r--r-- | fast-import.c | 246 | ||||
-rw-r--r-- | hash.h | 21 | ||||
-rw-r--r-- | hex.c | 55 | ||||
-rw-r--r-- | path.c | 2 | ||||
-rw-r--r-- | repository.c | 4 | ||||
-rw-r--r-- | sequencer.c | 2 | ||||
-rw-r--r-- | setup.c | 6 | ||||
-rw-r--r-- | sha1-file.c | 18 | ||||
-rw-r--r-- | sha256/gcrypt.h | 6 | ||||
-rw-r--r-- | t/helper/test-dump-split-index.c | 2 | ||||
-rw-r--r-- | t/helper/test-repository.c | 14 | ||||
-rwxr-xr-x | t/t1450-fsck.sh | 24 | ||||
-rwxr-xr-x | t/t6300-for-each-ref.sh | 27 | ||||
-rwxr-xr-x | t/t7510-signed-commit.sh | 16 | ||||
-rwxr-xr-x | t/t9300-fast-import.sh | 109 | ||||
-rw-r--r-- | t/test-lib.sh | 29 | ||||
-rw-r--r-- | worktree.c | 10 |
28 files changed, 623 insertions, 141 deletions
diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index 7889f95940..77c6b3d001 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -122,6 +122,26 @@ Locations of Marks Files Relative and non-relative marks may be combined by interweaving --(no-)-relative-marks with the --(import|export)-marks= options. +Submodule Rewriting +~~~~~~~~~~~~~~~~~~~ + +--rewrite-submodules-from=<name>:<file>:: +--rewrite-submodules-to=<name>:<file>:: + Rewrite the object IDs for the submodule specified by <name> from the values + used in the from <file> to those used in the to <file>. The from marks should + have been created by `git fast-export`, and the to marks should have been + created by `git fast-import` when importing that same submodule. ++ +<name> may be any arbitrary string not containing a colon character, but the +same value must be used with both options when specifying corresponding marks. +Multiple submodules may be specified with different values for <name>. It is an +error not to use these options in corresponding pairs. ++ +These options are primarily useful when converting a repository from one hash +algorithm to another; without them, fast-import will fail if it encounters a +submodule because it has no way of writing the object ID into the new hash +algorithm. + Performance and Compression Tuning ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/Documentation/git-init.txt b/Documentation/git-init.txt index 32880aafb0..adc6adfd38 100644 --- a/Documentation/git-init.txt +++ b/Documentation/git-init.txt @@ -10,7 +10,7 @@ SYNOPSIS -------- [verse] 'git init' [-q | --quiet] [--bare] [--template=<template_directory>] - [--separate-git-dir <git dir>] + [--separate-git-dir <git dir>] [--object-format=<format] [--shared[=<permissions>]] [directory] @@ -48,6 +48,11 @@ Only print error and warning messages; all other output will be suppressed. Create a bare repository. If `GIT_DIR` environment is not set, it is set to the current working directory. +--object-format=<format>:: + +Specify the given object format (hash algorithm) for the repository. The valid +values are 'sha1' and (if enabled) 'sha256'. 'sha1' is the default. + --template=<template_directory>:: Specify the directory from which templates will be used. (See the "TEMPLATE diff --git a/Documentation/git.txt b/Documentation/git.txt index b0672bd806..9d6769e95a 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -493,6 +493,12 @@ double-quotes and respecting backslash escapes. E.g., the value details. This variable has lower precedence than other path variables such as GIT_INDEX_FILE, GIT_OBJECT_DIRECTORY... +`GIT_DEFAULT_HASH_ALGORITHM`:: + If this variable is set, the default hash algorithm for new + repositories will be set to this value. This value is currently + ignored when cloning; the setting of the remote repository + is used instead. The default is "sha1". + Git Commits ~~~~~~~~~~~ `GIT_AUTHOR_NAME`:: diff --git a/builtin/clone.c b/builtin/clone.c index 488bdb0741..46573b9b7c 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1106,7 +1106,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) } } - init_db(git_dir, real_git_dir, option_template, INIT_DB_QUIET); + init_db(git_dir, real_git_dir, option_template, GIT_HASH_UNKNOWN, INIT_DB_QUIET); if (real_git_dir) git_dir = real_git_dir; diff --git a/builtin/commit.c b/builtin/commit.c index 5f379e4807..d3e7781e65 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1667,7 +1667,7 @@ int cmd_commit(int argc, const char **argv, const char *prefix) } if (amend) { - const char *exclude_gpgsig[2] = { "gpgsig", NULL }; + const char *exclude_gpgsig[3] = { "gpgsig", "gpgsig-sha256", NULL }; extra = read_commit_extra_headers(current_head, exclude_gpgsig); } else { struct commit_extra_header **tail = &extra; diff --git a/builtin/init-db.c b/builtin/init-db.c index 5bf61a7e05..0b7222e718 100644 --- a/builtin/init-db.c +++ b/builtin/init-db.c @@ -20,6 +20,8 @@ #define TEST_FILEMODE 1 #endif +#define GIT_DEFAULT_HASH_ENVIRONMENT "GIT_DEFAULT_HASH" + static int init_is_bare_repository = 0; static int init_shared_repository = -1; static const char *init_db_template_dir; @@ -176,13 +178,36 @@ static int needs_work_tree_config(const char *git_dir, const char *work_tree) return 1; } +void initialize_repository_version(int hash_algo) +{ + char repo_version_string[10]; + int repo_version = GIT_REPO_VERSION; + +#ifndef ENABLE_SHA256 + if (hash_algo != GIT_HASH_SHA1) + die(_("The hash algorithm %s is not supported in this build."), hash_algos[hash_algo].name); +#endif + + if (hash_algo != GIT_HASH_SHA1) + repo_version = GIT_REPO_VERSION_READ; + + /* This forces creation of new config file */ + xsnprintf(repo_version_string, sizeof(repo_version_string), + "%d", repo_version); + git_config_set("core.repositoryformatversion", repo_version_string); + + if (hash_algo != GIT_HASH_SHA1) + git_config_set("extensions.objectformat", + hash_algos[hash_algo].name); +} + static int create_default_files(const char *template_path, - const char *original_git_dir) + const char *original_git_dir, + const struct repository_format *fmt) { struct stat st1; struct strbuf buf = STRBUF_INIT; char *path; - char repo_version_string[10]; char junk[2]; int reinit; int filemode; @@ -244,10 +269,7 @@ static int create_default_files(const char *template_path, exit(1); } - /* This forces creation of new config file */ - xsnprintf(repo_version_string, sizeof(repo_version_string), - "%d", GIT_REPO_VERSION); - git_config_set("core.repositoryformatversion", repo_version_string); + initialize_repository_version(fmt->hash_algo); /* Check filemode trustability */ path = git_path_buf(&buf, "config"); @@ -340,12 +362,33 @@ static void separate_git_dir(const char *git_dir, const char *git_link) write_file(git_link, "gitdir: %s", git_dir); } +static void validate_hash_algorithm(struct repository_format *repo_fmt, int hash) +{ + const char *env = getenv(GIT_DEFAULT_HASH_ENVIRONMENT); + /* + * If we already have an initialized repo, don't allow the user to + * specify a different algorithm, as that could cause corruption. + * Otherwise, if the user has specified one on the command line, use it. + */ + if (repo_fmt->version >= 0 && hash != GIT_HASH_UNKNOWN && hash != repo_fmt->hash_algo) + die(_("attempt to reinitialize repository with different hash")); + else if (hash != GIT_HASH_UNKNOWN) + repo_fmt->hash_algo = hash; + else if (env) { + int env_algo = hash_algo_by_name(env); + if (env_algo == GIT_HASH_UNKNOWN) + die(_("unknown hash algorithm '%s'"), env); + repo_fmt->hash_algo = env_algo; + } +} + int init_db(const char *git_dir, const char *real_git_dir, - const char *template_dir, unsigned int flags) + const char *template_dir, int hash, unsigned int flags) { int reinit; int exist_ok = flags & INIT_DB_EXIST_OK; char *original_git_dir = real_pathdup(git_dir, 1); + struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT; if (real_git_dir) { struct stat st; @@ -378,9 +421,11 @@ int init_db(const char *git_dir, const char *real_git_dir, * config file, so this will not fail. What we are catching * is an attempt to reinitialize new repository with an old tool. */ - check_repository_format(); + check_repository_format(&repo_fmt); - reinit = create_default_files(template_dir, original_git_dir); + validate_hash_algorithm(&repo_fmt, hash); + + reinit = create_default_files(template_dir, original_git_dir, &repo_fmt); create_object_directory(); @@ -482,6 +527,8 @@ int cmd_init_db(int argc, const char **argv, const char *prefix) const char *work_tree; const char *template_dir = NULL; unsigned int flags = 0; + const char *object_format = NULL; + int hash_algo = GIT_HASH_UNKNOWN; const struct option init_db_options[] = { OPT_STRING(0, "template", &template_dir, N_("template-directory"), N_("directory from which templates will be used")), @@ -494,6 +541,8 @@ int cmd_init_db(int argc, const char **argv, const char *prefix) OPT_BIT('q', "quiet", &flags, N_("be quiet"), INIT_DB_QUIET), OPT_STRING(0, "separate-git-dir", &real_git_dir, N_("gitdir"), N_("separate git dir from working tree")), + OPT_STRING(0, "object-format", &object_format, N_("hash"), + N_("specify the hash algorithm to use")), OPT_END() }; @@ -546,6 +595,12 @@ int cmd_init_db(int argc, const char **argv, const char *prefix) free(cwd); } + if (object_format) { + hash_algo = hash_algo_by_name(object_format); + if (hash_algo == GIT_HASH_UNKNOWN) + die(_("unknown hash algorithm '%s'"), object_format); + } + if (init_shared_repository != -1) set_shared_repository(init_shared_repository); @@ -597,5 +652,5 @@ int cmd_init_db(int argc, const char **argv, const char *prefix) UNLEAK(work_tree); flags |= INIT_DB_EXIST_OK; - return init_db(git_dir, real_git_dir, template_dir, flags); + return init_db(git_dir, real_git_dir, template_dir, hash_algo, flags); } diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 02aa6ee480..4c2bb170c6 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -880,7 +880,7 @@ static void write_reused_pack_one(size_t pos, struct hashfile *out, len = encode_in_pack_object_header(header, sizeof(header), OBJ_REF_DELTA, size); hashwrite(out, header, len); - hashwrite(out, base_oid.hash, 20); + hashwrite(out, base_oid.hash, the_hash_algo->rawsz); copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur); return; } @@ -627,7 +627,9 @@ int path_inside_repo(const char *prefix, const char *path); #define INIT_DB_EXIST_OK 0x0002 int init_db(const char *git_dir, const char *real_git_dir, - const char *template_dir, unsigned int flags); + const char *template_dir, int hash_algo, + unsigned int flags); +void initialize_repository_version(int hash_algo); void sanitize_stdfds(void); int daemonize(void); @@ -1086,8 +1088,10 @@ int verify_repository_format(const struct repository_format *format, * and die if it is a version we don't understand. Generally one would * set_git_dir() before calling this, and use it only for "are we in a valid * repo?". + * + * If successful and fmt is not NULL, fill fmt with data. */ -void check_repository_format(void); +void check_repository_format(struct repository_format *fmt); #define MTIME_CHANGED 0x0001 #define CTIME_CHANGED 0x0002 @@ -1479,6 +1483,9 @@ int set_disambiguate_hint_config(const char *var, const char *value); int get_sha1_hex(const char *hex, unsigned char *sha1); int get_oid_hex(const char *hex, struct object_id *sha1); +/* Like get_oid_hex, but for an arbitrary hash algorithm. */ +int get_oid_hex_algop(const char *hex, struct object_id *oid, const struct git_hash_algo *algop); + /* * Read `len` pairs of hexadecimal digits from `hex` and write the * values to `binary` as `len` bytes. Return 0 on success, or -1 if @@ -1514,6 +1521,20 @@ char *oid_to_hex(const struct object_id *oid); /* same static buffer */ */ int parse_oid_hex(const char *hex, struct object_id *oid, const char **end); +/* Like parse_oid_hex, but for an arbitrary hash algorithm. */ +int parse_oid_hex_algop(const char *hex, struct object_id *oid, const char **end, + const struct git_hash_algo *algo); + + +/* + * These functions work like get_oid_hex and parse_oid_hex, but they will parse + * a hex value for any algorithm. The algorithm is detected based on the length + * and the algorithm in use is returned. If this is not a hex object ID in any + * algorithm, returns GIT_HASH_UNKNOWN. + */ +int get_oid_hex_any(const char *hex, struct object_id *oid); +int parse_oid_hex_any(const char *hex, struct object_id *oid, const char **end); + /* * This reads short-hand syntax that not only evaluates to a commit * object name, but also can act as if the end user spelled the name @@ -961,14 +961,22 @@ cleanup_return: return ret; } -static const char gpg_sig_header[] = "gpgsig"; -static const int gpg_sig_header_len = sizeof(gpg_sig_header) - 1; +/* + * Indexed by hash algorithm identifier. + */ +static const char *gpg_sig_headers[] = { + NULL, + "gpgsig", + "gpgsig-sha256", +}; static int do_sign_commit(struct strbuf *buf, const char *keyid) { struct strbuf sig = STRBUF_INIT; int inspos, copypos; const char *eoh; + const char *gpg_sig_header = gpg_sig_headers[hash_algo_by_ptr(the_hash_algo)]; + int gpg_sig_header_len = strlen(gpg_sig_header); /* find the end of the header */ eoh = strstr(buf->buf, "\n\n"); @@ -1010,6 +1018,8 @@ int parse_signed_commit(const struct commit *commit, const char *buffer = get_commit_buffer(commit, &size); int in_signature, saw_signature = -1; const char *line, *tail; + const char *gpg_sig_header = gpg_sig_headers[hash_algo_by_ptr(the_hash_algo)]; + int gpg_sig_header_len = strlen(gpg_sig_header); line = buffer; tail = buffer + size; @@ -1056,11 +1066,17 @@ int remove_signature(struct strbuf *buf) if (in_signature && line[0] == ' ') sig_end = next; - else if (starts_with(line, gpg_sig_header) && - line[gpg_sig_header_len] == ' ') { - sig_start = line; - sig_end = next; - in_signature = 1; + else if (starts_with(line, "gpgsig")) { + int i; + for (i = 1; i < GIT_HASH_NALGOS; i++) { + const char *p; + if (skip_prefix(line, gpg_sig_headers[i], &p) && + *p == ' ') { + sig_start = line; + sig_end = next; + in_signature = 1; + } + } } else { if (*line == '\n') /* dump the whole remainder of the buffer */ diff --git a/config.mak.dev b/config.mak.dev index 89b218d11a..cd4a82a9eb 100644 --- a/config.mak.dev +++ b/config.mak.dev @@ -16,6 +16,8 @@ DEVELOPER_CFLAGS += -Wstrict-prototypes DEVELOPER_CFLAGS += -Wunused DEVELOPER_CFLAGS += -Wvla +DEVELOPER_CFLAGS += -DENABLE_SHA256 + ifndef COMPILER_FEATURES COMPILER_FEATURES := $(shell ./detect-compiler $(CC)) endif diff --git a/csum-file.c b/csum-file.c index 53ce37f7ca..0f35fa5ee4 100644 --- a/csum-file.c +++ b/csum-file.c @@ -157,7 +157,7 @@ void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpo { hashflush(f); checkpoint->offset = f->total; - checkpoint->ctx = f->ctx; + the_hash_algo->clone_fn(&checkpoint->ctx, &f->ctx); } int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint) diff --git a/fast-import.c b/fast-import.c index b8b65a801c..202dda11a6 100644 --- a/fast-import.c +++ b/fast-import.c @@ -18,6 +18,7 @@ #include "object-store.h" #include "mem-pool.h" #include "commit-reach.h" +#include "khash.h" #define PACK_ID_BITS 16 #define MAX_PACK_ID ((1<<PACK_ID_BITS)-1) @@ -53,6 +54,7 @@ struct object_entry_pool { struct mark_set { union { + struct object_id *oids[1024]; struct object_entry *marked[1024]; struct mark_set *sets[1024]; } data; @@ -131,6 +133,9 @@ struct recent_command { char *buf; }; +typedef void (*mark_set_inserter_t)(struct mark_set *s, struct object_id *oid, uintmax_t mark); +typedef void (*each_mark_fn_t)(uintmax_t mark, void *obj, void *cbp); + /* Configured limits on output */ static unsigned long max_depth = 50; static off_t max_packsize; @@ -222,6 +227,11 @@ static int allow_unsafe_features; /* Signal handling */ static volatile sig_atomic_t checkpoint_requested; +/* Submodule marks */ +static struct string_list sub_marks_from = STRING_LIST_INIT_DUP; +static struct string_list sub_marks_to = STRING_LIST_INIT_DUP; +static kh_oid_map_t *sub_oid_map; + /* Where to write output of cat-blob commands */ static int cat_blob_fd = STDOUT_FILENO; @@ -230,6 +240,29 @@ static void parse_get_mark(const char *p); static void parse_cat_blob(const char *p); static void parse_ls(const char *p, struct branch *b); +static void for_each_mark(struct mark_set *m, uintmax_t base, each_mark_fn_t callback, void *p) +{ + uintmax_t k; + if (m->shift) { + for (k = 0; k < 1024; k++) { + if (m->data.sets[k]) + for_each_mark(m->data.sets[k], base + (k << m->shift), callback, p); + } + } else { + for (k = 0; k < 1024; k++) { + if (m->data.marked[k]) + callback(base + k, m->data.marked[k], p); + } + } +} + +static void dump_marks_fn(uintmax_t mark, void *object, void *cbp) { + struct object_entry *e = object; + FILE *f = cbp; + + fprintf(f, ":%" PRIuMAX " %s\n", mark, oid_to_hex(&e->idx.oid)); +} + static void write_branch_report(FILE *rpt, struct branch *b) { fprintf(rpt, "%s:\n", b->name); @@ -258,8 +291,6 @@ static void write_branch_report(FILE *rpt, struct branch *b) fputc('\n', rpt); } -static void dump_marks_helper(FILE *, uintmax_t, struct mark_set *); - static void write_crash_report(const char *err) { char *loc = git_pathdup("fast_import_crash_%"PRIuMAX, (uintmax_t) getpid()); @@ -338,7 +369,7 @@ static void write_crash_report(const char *err) if (export_marks_file) fprintf(rpt, " exported to %s\n", export_marks_file); else - dump_marks_helper(rpt, 0, marks); + for_each_mark(marks, 0, dump_marks_fn, rpt); fputc('\n', rpt); fputs("-------------------\n", rpt); @@ -493,9 +524,8 @@ static char *pool_strdup(const char *s) return r; } -static void insert_mark(uintmax_t idnum, struct object_entry *oe) +static void insert_mark(struct mark_set *s, uintmax_t idnum, struct object_entry *oe) { - struct mark_set *s = marks; while ((idnum >> s->shift) >= 1024) { s = mem_pool_calloc(&fi_mem_pool, 1, sizeof(struct mark_set)); s->shift = marks->shift + 10; @@ -516,10 +546,9 @@ static void insert_mark(uintmax_t idnum, struct object_entry *oe) s->data.marked[idnum] = oe; } -static struct object_entry *find_mark(uintmax_t idnum) +static void *find_mark(struct mark_set *s, uintmax_t idnum) { uintmax_t orig_idnum = idnum; - struct mark_set *s = marks; struct object_entry *oe = NULL; if ((idnum >> s->shift) < 1024) { while (s && s->shift) { @@ -919,7 +948,7 @@ static int store_object( e = insert_object(&oid); if (mark) - insert_mark(mark, e); + insert_mark(marks, mark, e); if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; @@ -1117,7 +1146,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) e = insert_object(&oid); if (mark) - insert_mark(mark, e); + insert_mark(marks, mark, e); if (e->idx.offset) { duplicate_count_by_type[OBJ_BLOB]++; @@ -1655,26 +1684,6 @@ static void dump_tags(void) strbuf_release(&err); } -static void dump_marks_helper(FILE *f, - uintmax_t base, - struct mark_set *m) -{ - uintmax_t k; - if (m->shift) { - for (k = 0; k < 1024; k++) { - if (m->data.sets[k]) - dump_marks_helper(f, base + (k << m->shift), - m->data.sets[k]); - } - } else { - for (k = 0; k < 1024; k++) { - if (m->data.marked[k]) - fprintf(f, ":%" PRIuMAX " %s\n", base + k, - oid_to_hex(&m->data.marked[k]->idx.oid)); - } - } -} - static void dump_marks(void) { struct lock_file mark_lock = LOCK_INIT; @@ -1704,7 +1713,7 @@ static void dump_marks(void) return; } - dump_marks_helper(f, 0, marks); + for_each_mark(marks, 0, dump_marks_fn, f); if (commit_lock_file(&mark_lock)) { failure |= error_errno("Unable to write file %s", export_marks_file); @@ -1712,21 +1721,38 @@ static void dump_marks(void) } } -static void read_marks(void) +static void insert_object_entry(struct mark_set *s, struct object_id *oid, uintmax_t mark) +{ + struct object_entry *e; + e = find_object(oid); + if (!e) { + enum object_type type = oid_object_info(the_repository, + oid, NULL); + if (type < 0) + die("object not found: %s", oid_to_hex(oid)); + e = insert_object(oid); + e->type = type; + e->pack_id = MAX_PACK_ID; + e->idx.offset = 1; /* just not zero! */ + } + insert_mark(s, mark, e); +} + +static void insert_oid_entry(struct mark_set *s, struct object_id *oid, uintmax_t mark) +{ + insert_mark(s, mark, xmemdupz(oid, sizeof(*oid))); +} + +static void read_mark_file(struct mark_set *s, FILE *f, mark_set_inserter_t inserter) { char line[512]; - FILE *f = fopen(import_marks_file, "r"); - if (f) - ; - else if (import_marks_file_ignore_missing && errno == ENOENT) - goto done; /* Marks file does not exist */ - else - die_errno("cannot read '%s'", import_marks_file); while (fgets(line, sizeof(line), f)) { uintmax_t mark; char *end; struct object_id oid; - struct object_entry *e; + + /* Ensure SHA-1 objects are padded with zeros. */ + memset(oid.hash, 0, sizeof(oid.hash)); end = strchr(line, '\n'); if (line[0] != ':' || !end) @@ -1734,21 +1760,23 @@ static void read_marks(void) *end = 0; mark = strtoumax(line + 1, &end, 10); if (!mark || end == line + 1 - || *end != ' ' || get_oid_hex(end + 1, &oid)) + || *end != ' ' + || get_oid_hex_any(end + 1, &oid) == GIT_HASH_UNKNOWN) die("corrupt mark line: %s", line); - e = find_object(&oid); - if (!e) { - enum object_type type = oid_object_info(the_repository, - &oid, NULL); - if (type < 0) - die("object not found: %s", oid_to_hex(&oid)); - e = insert_object(&oid); - e->type = type; - e->pack_id = MAX_PACK_ID; - e->idx.offset = 1; /* just not zero! */ - } - insert_mark(mark, e); + inserter(s, &oid, mark); } +} + +static void read_marks(void) +{ + FILE *f = fopen(import_marks_file, "r"); + if (f) + ; + else if (import_marks_file_ignore_missing && errno == ENOENT) + goto done; /* Marks file does not exist */ + else + die_errno("cannot read '%s'", import_marks_file); + read_mark_file(marks, f, insert_object_entry); fclose(f); done: import_marks_file_done = 1; @@ -2134,6 +2162,30 @@ static uintmax_t change_note_fanout(struct tree_entry *root, return do_change_note_fanout(root, root, hex_oid, 0, path, 0, fanout); } +static int parse_mapped_oid_hex(const char *hex, struct object_id *oid, const char **end) +{ + int algo; + khiter_t it; + + /* Make SHA-1 object IDs have all-zero padding. */ + memset(oid->hash, 0, sizeof(oid->hash)); + + algo = parse_oid_hex_any(hex, oid, end); + if (algo == GIT_HASH_UNKNOWN) + return -1; + + it = kh_get_oid_map(sub_oid_map, *oid); + /* No such object? */ + if (it == kh_end(sub_oid_map)) { + /* If we're using the same algorithm, pass it through. */ + if (hash_algos[algo].format_id == the_hash_algo->format_id) + return 0; + return -1; + } + oidcpy(oid, kh_value(sub_oid_map, it)); + return 0; +} + /* * Given a pointer into a string, parse a mark reference: * @@ -2214,13 +2266,13 @@ static void file_change_m(const char *p, struct branch *b) } if (*p == ':') { - oe = find_mark(parse_mark_ref_space(&p)); + oe = find_mark(marks, parse_mark_ref_space(&p)); oidcpy(&oid, &oe->idx.oid); } else if (skip_prefix(p, "inline ", &p)) { inline_data = 1; oe = NULL; /* not used with inline_data, but makes gcc happy */ } else { - if (parse_oid_hex(p, &oid, &p)) + if (parse_mapped_oid_hex(p, &oid, &p)) die("Invalid dataref: %s", command_buf.buf); oe = find_object(&oid); if (*p++ != ' ') @@ -2388,13 +2440,13 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa /* Now parse the notemodify command. */ /* <dataref> or 'inline' */ if (*p == ':') { - oe = find_mark(parse_mark_ref_space(&p)); + oe = find_mark(marks, parse_mark_ref_space(&p)); oidcpy(&oid, &oe->idx.oid); } else if (skip_prefix(p, "inline ", &p)) { inline_data = 1; oe = NULL; /* not used with inline_data, but makes gcc happy */ } else { - if (parse_oid_hex(p, &oid, &p)) + if (parse_mapped_oid_hex(p, &oid, &p)) die("Invalid dataref: %s", command_buf.buf); oe = find_object(&oid); if (*p++ != ' ') @@ -2409,7 +2461,7 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa oidcpy(&commit_oid, &s->oid); } else if (*p == ':') { uintmax_t commit_mark = parse_mark_ref_eol(p); - struct object_entry *commit_oe = find_mark(commit_mark); + struct object_entry *commit_oe = find_mark(marks, commit_mark); if (commit_oe->type != OBJ_COMMIT) die("Mark :%" PRIuMAX " not a commit", commit_mark); oidcpy(&commit_oid, &commit_oe->idx.oid); @@ -2513,7 +2565,7 @@ static int parse_objectish(struct branch *b, const char *objectish) oidcpy(&b->branch_tree.versions[1].oid, t); } else if (*objectish == ':') { uintmax_t idnum = parse_mark_ref_eol(objectish); - struct object_entry *oe = find_mark(idnum); + struct object_entry *oe = find_mark(marks, idnum); if (oe->type != OBJ_COMMIT) die("Mark :%" PRIuMAX " not a commit", idnum); if (!oideq(&b->oid, &oe->idx.oid)) { @@ -2577,7 +2629,7 @@ static struct hash_list *parse_merge(unsigned int *count) oidcpy(&n->oid, &s->oid); else if (*from == ':') { uintmax_t idnum = parse_mark_ref_eol(from); - struct object_entry *oe = find_mark(idnum); + struct object_entry *oe = find_mark(marks, idnum); if (oe->type != OBJ_COMMIT) die("Mark :%" PRIuMAX " not a commit", idnum); oidcpy(&n->oid, &oe->idx.oid); @@ -2751,7 +2803,7 @@ static void parse_new_tag(const char *arg) } else if (*from == ':') { struct object_entry *oe; from_mark = parse_mark_ref_eol(from); - oe = find_mark(from_mark); + oe = find_mark(marks, from_mark); type = oe->type; oidcpy(&oid, &oe->idx.oid); } else if (!get_oid(from, &oid)) { @@ -2909,7 +2961,7 @@ static void parse_get_mark(const char *p) if (*p != ':') die("Not a mark: %s", p); - oe = find_mark(parse_mark_ref_eol(p)); + oe = find_mark(marks, parse_mark_ref_eol(p)); if (!oe) die("Unknown mark: %s", command_buf.buf); @@ -2924,12 +2976,12 @@ static void parse_cat_blob(const char *p) /* cat-blob SP <object> LF */ if (*p == ':') { - oe = find_mark(parse_mark_ref_eol(p)); + oe = find_mark(marks, parse_mark_ref_eol(p)); if (!oe) die("Unknown mark: %s", command_buf.buf); oidcpy(&oid, &oe->idx.oid); } else { - if (parse_oid_hex(p, &oid, &p)) + if (parse_mapped_oid_hex(p, &oid, &p)) die("Invalid dataref: %s", command_buf.buf); if (*p) die("Garbage after SHA1: %s", command_buf.buf); @@ -2993,18 +3045,54 @@ static struct object_entry *dereference(struct object_entry *oe, return find_object(oid); } +static void insert_mapped_mark(uintmax_t mark, void *object, void *cbp) +{ + struct object_id *fromoid = object; + struct object_id *tooid = find_mark(cbp, mark); + int ret; + khiter_t it; + + it = kh_put_oid_map(sub_oid_map, *fromoid, &ret); + |