diff options
Diffstat (limited to 'fast-import.c')
-rw-r--r-- | fast-import.c | 181 |
1 files changed, 122 insertions, 59 deletions
diff --git a/fast-import.c b/fast-import.c index 1d5e3336a5..350b2e9e10 100644 --- a/fast-import.c +++ b/fast-import.c @@ -170,6 +170,11 @@ Format of STDIN stream: #define DEPTH_BITS 13 #define MAX_DEPTH ((1<<DEPTH_BITS)-1) +/* + * We abuse the setuid bit on directories to mean "do not delta". + */ +#define NO_DELTA S_ISUID + struct object_entry { struct pack_idx_entry idx; struct object_entry *next; @@ -284,6 +289,7 @@ static uintmax_t marks_set_count; static uintmax_t object_count_by_type[1 << TYPE_BITS]; static uintmax_t duplicate_count_by_type[1 << TYPE_BITS]; static uintmax_t delta_count_by_type[1 << TYPE_BITS]; +static uintmax_t delta_count_attempts_by_type[1 << TYPE_BITS]; static unsigned long object_count; static unsigned long branch_count; static unsigned long branch_load_count; @@ -304,6 +310,7 @@ static unsigned int atom_cnt; static struct atom_str **atom_table; /* The .pack file being generated */ +static struct pack_idx_option pack_idx_opts; static unsigned int pack_id; static struct sha1file *pack_file; static struct packed_git *pack_data; @@ -354,6 +361,7 @@ static unsigned int cmd_save = 100; static uintmax_t next_mark; static struct strbuf new_data = STRBUF_INIT; static int seen_data_command; +static int require_explicit_termination; /* Signal handling */ static volatile sig_atomic_t checkpoint_requested; @@ -714,13 +722,8 @@ static struct branch *new_branch(const char *name) if (b) die("Invalid attempt to create duplicate branch: %s", name); - switch (check_ref_format(name)) { - case 0: break; /* its valid */ - case CHECK_REF_FORMAT_ONELEVEL: - break; /* valid, but too few '/', allow anyway */ - default: + if (check_refname_format(name, REFNAME_ALLOW_ONELEVEL)) die("Branch name doesn't conform to GIT standards: %s", name); - } b = pool_calloc(1, sizeof(struct branch)); b->name = pool_strdup(name); @@ -896,7 +899,7 @@ static const char *create_index(void) if (c != last) die("internal consistency error creating the index"); - tmpfile = write_idx_file(NULL, idx, object_count, pack_data->sha1); + tmpfile = write_idx_file(NULL, idx, object_count, &pack_idx_opts, pack_data->sha1); free(idx); return tmpfile; } @@ -1043,6 +1046,7 @@ static int store_object( } if (last && last->data.buf && last->depth < max_depth && dat->len > 20) { + delta_count_attempts_by_type[type]++; delta = diff_delta(last->data.buf, last->data.len, dat->buf, dat->len, &deltalen, dat->len - 20); @@ -1139,17 +1143,11 @@ static int store_object( return 0; } -static void truncate_pack(off_t to, git_SHA_CTX *ctx) +static void truncate_pack(struct sha1file_checkpoint *checkpoint) { - if (ftruncate(pack_data->pack_fd, to) - || lseek(pack_data->pack_fd, to, SEEK_SET) != to) + if (sha1file_truncate(pack_file, checkpoint)) die_errno("cannot truncate pack to skip duplicate"); - pack_size = to; - - /* yes this is a layering violation */ - pack_file->total = to; - pack_file->offset = 0; - pack_file->ctx = *ctx; + pack_size = checkpoint->offset; } static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark) @@ -1162,8 +1160,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark) unsigned long hdrlen; off_t offset; git_SHA_CTX c; - git_SHA_CTX pack_file_ctx; git_zstream s; + struct sha1file_checkpoint checkpoint; int status = Z_OK; /* Determine if we should auto-checkpoint. */ @@ -1171,11 +1169,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark) || (pack_size + 60 + len) < pack_size) cycle_packfile(); - offset = pack_size; - - /* preserve the pack_file SHA1 ctx in case we have to truncate later */ - sha1flush(pack_file); - pack_file_ctx = pack_file->ctx; + sha1file_checkpoint(pack_file, &checkpoint); + offset = checkpoint.offset; hdrlen = snprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1; if (out_sz <= hdrlen) @@ -1241,14 +1236,14 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark) if (e->idx.offset) { duplicate_count_by_type[OBJ_BLOB]++; - truncate_pack(offset, &pack_file_ctx); + truncate_pack(&checkpoint); } else if (find_sha1_pack(sha1, packed_git)) { e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ duplicate_count_by_type[OBJ_BLOB]++; - truncate_pack(offset, &pack_file_ctx); + truncate_pack(&checkpoint); } else { e->depth = 0; @@ -1414,8 +1409,9 @@ static void mktree(struct tree_content *t, int v, struct strbuf *b) struct tree_entry *e = t->entries[i]; if (!e->versions[v].mode) continue; - strbuf_addf(b, "%o %s%c", (unsigned int)e->versions[v].mode, - e->name->str_dat, '\0'); + strbuf_addf(b, "%o %s%c", + (unsigned int)(e->versions[v].mode & ~NO_DELTA), + e->name->str_dat, '\0'); strbuf_add(b, e->versions[v].sha1, 20); } } @@ -1425,7 +1421,7 @@ static void store_tree(struct tree_entry *root) struct tree_content *t = root->tree; unsigned int i, j, del; struct last_object lo = { STRBUF_INIT, 0, 0, /* no_swap */ 1 }; - struct object_entry *le; + struct object_entry *le = NULL; if (!is_null_sha1(root->versions[1].sha1)) return; @@ -1435,7 +1431,8 @@ static void store_tree(struct tree_entry *root) store_tree(t->entries[i]); } - le = find_object(root->versions[0].sha1); + if (!(root->versions[0].mode & NO_DELTA)) + le = find_object(root->versions[0].sha1); if (S_ISDIR(root->versions[0].mode) && le && le->pack_id == pack_id) { mktree(t, 0, &old_tree); lo.data = old_tree; @@ -1469,6 +1466,7 @@ static void tree_content_replace( { if (!S_ISDIR(mode)) die("Root cannot be a non-directory"); + hashclr(root->versions[0].sha1); hashcpy(root->versions[1].sha1, sha1); if (root->tree) release_tree_content_recursive(root->tree); @@ -1513,6 +1511,23 @@ static int tree_content_set( if (e->tree) release_tree_content_recursive(e->tree); e->tree = subtree; + + /* + * We need to leave e->versions[0].sha1 alone + * to avoid modifying the preimage tree used + * when writing out the parent directory. + * But after replacing the subdir with a + * completely different one, it's not a good + * delta base any more, and besides, we've + * thrown away the tree entries needed to + * make a delta against it. + * + * So let's just explicitly disable deltas + * for the subtree. + */ + if (S_ISDIR(e->versions[0].mode)) + e->versions[0].mode |= NO_DELTA; + hashclr(root->versions[1].sha1); return 1; } @@ -1967,32 +1982,41 @@ static int validate_raw_date(const char *src, char *result, int maxlen) static char *parse_ident(const char *buf) { - const char *gt; + const char *ltgt; size_t name_len; char *ident; - gt = strrchr(buf, '>'); - if (!gt) + /* ensure there is a space delimiter even if there is no name */ + if (*buf == '<') + --buf; + + ltgt = buf + strcspn(buf, "<>"); + if (*ltgt != '<') + die("Missing < in ident string: %s", buf); + if (ltgt != buf && ltgt[-1] != ' ') + die("Missing space before < in ident string: %s", buf); + ltgt = ltgt + 1 + strcspn(ltgt + 1, "<>"); + if (*ltgt != '>') die("Missing > in ident string: %s", buf); - gt++; - if (*gt != ' ') + ltgt++; + if (*ltgt != ' ') die("Missing space after > in ident string: %s", buf); - gt++; - name_len = gt - buf; + ltgt++; + name_len = ltgt - buf; ident = xmalloc(name_len + 24); strncpy(ident, buf, name_len); switch (whenspec) { case WHENSPEC_RAW: - if (validate_raw_date(gt, ident + name_len, 24) < 0) - die("Invalid raw date \"%s\" in ident: %s", gt, buf); + if (validate_raw_date(ltgt, ident + name_len, 24) < 0) + die("Invalid raw date \"%s\" in ident: %s", ltgt, buf); break; case WHENSPEC_RFC2822: - if (parse_date(gt, ident + name_len, 24) < 0) - die("Invalid rfc2822 date \"%s\" in ident: %s", gt, buf); + if (parse_date(ltgt, ident + name_len, 24) < 0) + die("Invalid rfc2822 date \"%s\" in ident: %s", ltgt, buf); break; case WHENSPEC_NOW: - if (strcmp("now", gt)) + if (strcmp("now", ltgt)) die("Date in ident must be 'now': %s", buf); datestamp(ident + name_len, 24); break; @@ -2140,6 +2164,11 @@ static uintmax_t do_change_note_fanout( if (tmp_hex_sha1_len == 40 && !get_sha1_hex(hex_sha1, sha1)) { /* This is a note entry */ + if (fanout == 0xff) { + /* Counting mode, no rename */ + num_notes++; + continue; + } construct_path_with_fanout(hex_sha1, fanout, realpath); if (!strcmp(fullpath, realpath)) { /* Note entry is in correct location */ @@ -2346,7 +2375,7 @@ static void file_change_cr(struct branch *b, int rename) leaf.tree); } -static void note_change_n(struct branch *b, unsigned char old_fanout) +static void note_change_n(struct branch *b, unsigned char *old_fanout) { const char *p = command_buf.buf + 2; static struct strbuf uq = STRBUF_INIT; @@ -2357,6 +2386,23 @@ static void note_change_n(struct branch *b, unsigned char old_fanout) uint16_t inline_data = 0; unsigned char new_fanout; + /* + * When loading a branch, we don't traverse its tree to count the real + * number of notes (too expensive to do this for all non-note refs). + * This means that recently loaded notes refs might incorrectly have + * b->num_notes == 0, and consequently, old_fanout might be wrong. + * + * Fix this by traversing the tree and counting the number of notes + * when b->num_notes == 0. If the notes tree is truly empty, the + * calculation should not take long. + */ + if (b->num_notes == 0 && *old_fanout == 0) { + /* Invoke change_note_fanout() in "counting mode". */ + b->num_notes = change_note_fanout(&b->branch_tree, 0xff); + *old_fanout = convert_num_notes_to_fanout(b->num_notes); + } + + /* Now parse the notemodify command. */ /* <dataref> or 'inline' */ if (*p == ':') { char *x; @@ -2378,6 +2424,8 @@ static void note_change_n(struct branch *b, unsigned char old_fanout) /* <committish> */ s = lookup_branch(p); if (s) { + if (is_null_sha1(s->sha1)) + die("Can't add a note on empty branch."); hashcpy(commit_sha1, s->sha1); } else if (*p == ':') { uintmax_t commit_mark = strtoumax(p + 1, NULL, 10); @@ -2415,7 +2463,7 @@ static void note_change_n(struct branch *b, unsigned char old_fanout) typename(type), command_buf.buf); } - construct_path_with_fanout(sha1_to_hex(commit_sha1), old_fanout, path); + construct_path_with_fanout(sha1_to_hex(commit_sha1), *old_fanout, path); if (tree_content_remove(&b->branch_tree, path, NULL)) b->num_notes--; @@ -2602,7 +2650,7 @@ static void parse_new_commit(void) else if (!prefixcmp(command_buf.buf, "C ")) file_change_cr(b, 0); else if (!prefixcmp(command_buf.buf, "N ")) - note_change_n(b, prev_fanout); + note_change_n(b, &prev_fanout); else if (!strcmp("deleteall", command_buf.buf)) file_change_deleteall(b); else if (!prefixcmp(command_buf.buf, "ls ")) @@ -2679,6 +2727,8 @@ static void parse_new_tag(void) from = strchr(command_buf.buf, ' ') + 1; s = lookup_branch(from); if (s) { + if (is_null_sha1(s->sha1)) + die("Can't tag an empty branch."); hashcpy(sha1, s->sha1); type = OBJ_COMMIT; } else if (*from == ':') { @@ -2688,13 +2738,13 @@ static void parse_new_tag(void) type = oe->type; hashcpy(sha1, oe->idx.sha1); } else if (!get_sha1(from, sha1)) { - unsigned long size; - char *buf; - - buf = read_sha1_file(sha1, &type, &size); - if (!buf || size < 46) - die("Not a valid commit: %s", from); - free(buf); + struct object_entry *oe = find_object(sha1); + if (!oe) { + type = sha1_object_info(sha1, NULL); + if (type < 0) + die("Not a valid object: %s", from); + } else + type = oe->type; } else die("Invalid ref name or SHA1 expression: %s", from); read_next_command(); @@ -2798,7 +2848,12 @@ static void cat_blob(struct object_entry *oe, unsigned char sha1[20]) strbuf_release(&line); cat_blob_write(buf, size); cat_blob_write("\n", 1); - free(buf); + if (oe && oe->pack_id == pack_id) { + last_blob.offset = oe->idx.offset; + strbuf_attach(&last_blob.data, buf, size, size); + last_blob.depth = oe->depth; + } else + free(buf); } static void parse_cat_blob(void) @@ -2927,7 +2982,7 @@ static void print_ls(int mode, const unsigned char *sha1, const char *path) /* mode SP type SP object_name TAB path LF */ strbuf_reset(&line); strbuf_addf(&line, "%06o %s %s\t", - mode, type, sha1_to_hex(sha1)); + mode & ~NO_DELTA, type, sha1_to_hex(sha1)); quote_c_style(path, &line, NULL, 0); strbuf_addch(&line, '\n'); } @@ -3139,6 +3194,8 @@ static int parse_one_feature(const char *feature, int from_stream) relative_marks_paths = 1; } else if (!strcmp(feature, "no-relative-marks")) { relative_marks_paths = 0; + } else if (!strcmp(feature, "done")) { + require_explicit_termination = 1; } else if (!strcmp(feature, "force")) { force_update = 1; } else if (!strcmp(feature, "notes") || !strcmp(feature, "ls")) { @@ -3195,10 +3252,10 @@ static int git_pack_config(const char *k, const char *v, void *cb) return 0; } if (!strcmp(k, "pack.indexversion")) { - pack_idx_default_version = git_config_int(k, v); - if (pack_idx_default_version > 2) + pack_idx_opts.version = git_config_int(k, v); + if (pack_idx_opts.version > 2) die("bad pack.indexversion=%"PRIu32, - pack_idx_default_version); + pack_idx_opts.version); return 0; } if (!strcmp(k, "pack.packsizelimit")) { @@ -3252,6 +3309,7 @@ int main(int argc, const char **argv) usage(fast_import_usage); setup_git_directory(); + reset_pack_idx_option(&pack_idx_opts); git_config(git_pack_config, NULL); if (!pack_compression_seen && core_compression_seen) pack_compression_level = core_compression_level; @@ -3288,6 +3346,8 @@ int main(int argc, const char **argv) parse_reset_branch(); else if (!strcmp("checkpoint", command_buf.buf)) parse_checkpoint(); + else if (!strcmp("done", command_buf.buf)) + break; else if (!prefixcmp(command_buf.buf, "progress ")) parse_progress(); else if (!prefixcmp(command_buf.buf, "feature ")) @@ -3307,6 +3367,9 @@ int main(int argc, const char **argv) if (!seen_data_command) parse_argv(); + if (require_explicit_termination && feof(stdin)) + die("stream ends early"); + end_packfile(); dump_branches(); @@ -3328,10 +3391,10 @@ int main(int argc, const char **argv) fprintf(stderr, "---------------------------------------------------------------------\n"); fprintf(stderr, "Alloc'd objects: %10" PRIuMAX "\n", alloc_count); fprintf(stderr, "Total objects: %10" PRIuMAX " (%10" PRIuMAX " duplicates )\n", total_count, duplicate_count); - fprintf(stderr, " blobs : %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB], delta_count_by_type[OBJ_BLOB]); - fprintf(stderr, " trees : %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE], delta_count_by_type[OBJ_TREE]); - fprintf(stderr, " commits: %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT], delta_count_by_type[OBJ_COMMIT]); - fprintf(stderr, " tags : %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG], delta_count_by_type[OBJ_TAG]); + fprintf(stderr, " blobs : %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas of %10" PRIuMAX" attempts)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB], delta_count_by_type[OBJ_BLOB], delta_count_attempts_by_type[OBJ_BLOB]); + fprintf(stderr, " trees : %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas of %10" PRIuMAX" attempts)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE], delta_count_by_type[OBJ_TREE], delta_count_attempts_by_type[OBJ_TREE]); + fprintf(stderr, " commits: %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas of %10" PRIuMAX" attempts)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT], delta_count_by_type[OBJ_COMMIT], delta_count_attempts_by_type[OBJ_COMMIT]); + fprintf(stderr, " tags : %10" PRIuMAX " (%10" PRIuMAX " duplicates %10" PRIuMAX " deltas of %10" PRIuMAX" attempts)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG], delta_count_by_type[OBJ_TAG], delta_count_attempts_by_type[OBJ_TAG]); fprintf(stderr, "Total branches: %10lu (%10lu loads )\n", branch_count, branch_load_count); fprintf(stderr, " marks: %10" PRIuMAX " (%10" PRIuMAX " unique )\n", (((uintmax_t)1) << marks->shift) * 1024, marks_set_count); fprintf(stderr, " atoms: %10u\n", atom_cnt); |