diff options
author | Jeff King <peff@peff.net> | 2020-08-13 10:59:45 -0400 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2020-08-13 11:02:13 -0700 |
commit | a006f875e2689cb7df543d5950beadb0416d305b (patch) | |
tree | 575fcc5519ccede961c7065ee193ed9989332ce0 /fast-import.c | |
parent | make git-bugreport a builtin (diff) | |
download | tgif-a006f875e2689cb7df543d5950beadb0416d305b.tar.xz |
make git-fast-import a builtin
There's no reason that git-fast-import benefits from being a separate
binary. And as it links against libgit.a, it has a non-trivial disk
footprint. Let's make it a builtin, which reduces the size of a stripped
installation from 22MB to 21MB.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'fast-import.c')
-rw-r--r-- | fast-import.c | 3649 |
1 files changed, 0 insertions, 3649 deletions
diff --git a/fast-import.c b/fast-import.c deleted file mode 100644 index ce47794db6..0000000000 --- a/fast-import.c +++ /dev/null @@ -1,3649 +0,0 @@ -#include "builtin.h" -#include "cache.h" -#include "repository.h" -#include "config.h" -#include "lockfile.h" -#include "object.h" -#include "blob.h" -#include "tree.h" -#include "commit.h" -#include "delta.h" -#include "pack.h" -#include "refs.h" -#include "csum-file.h" -#include "quote.h" -#include "dir.h" -#include "run-command.h" -#include "packfile.h" -#include "object-store.h" -#include "mem-pool.h" -#include "commit-reach.h" -#include "khash.h" - -#define PACK_ID_BITS 16 -#define MAX_PACK_ID ((1<<PACK_ID_BITS)-1) -#define DEPTH_BITS 13 -#define MAX_DEPTH ((1<<DEPTH_BITS)-1) - -/* - * We abuse the setuid bit on directories to mean "do not delta". - */ -#define NO_DELTA S_ISUID - -/* - * The amount of additional space required in order to write an object into the - * current pack. This is the hash lengths at the end of the pack, plus the - * length of one object ID. - */ -#define PACK_SIZE_THRESHOLD (the_hash_algo->rawsz * 3) - -struct object_entry { - struct pack_idx_entry idx; - struct hashmap_entry ent; - uint32_t type : TYPE_BITS, - pack_id : PACK_ID_BITS, - depth : DEPTH_BITS; -}; - -static int object_entry_hashcmp(const void *map_data, - const struct hashmap_entry *eptr, - const struct hashmap_entry *entry_or_key, - const void *keydata) -{ - const struct object_id *oid = keydata; - const struct object_entry *e1, *e2; - - e1 = container_of(eptr, const struct object_entry, ent); - if (oid) - return oidcmp(&e1->idx.oid, oid); - - e2 = container_of(entry_or_key, const struct object_entry, ent); - return oidcmp(&e1->idx.oid, &e2->idx.oid); -} - -struct object_entry_pool { - struct object_entry_pool *next_pool; - struct object_entry *next_free; - struct object_entry *end; - struct object_entry entries[FLEX_ARRAY]; /* more */ -}; - -struct mark_set { - union { - struct object_id *oids[1024]; - struct object_entry *marked[1024]; - struct mark_set *sets[1024]; - } data; - unsigned int shift; -}; - -struct last_object { - struct strbuf data; - off_t offset; - unsigned int depth; - unsigned no_swap : 1; -}; - -struct atom_str { - struct atom_str *next_atom; - unsigned short str_len; - char str_dat[FLEX_ARRAY]; /* more */ -}; - -struct tree_content; -struct tree_entry { - struct tree_content *tree; - struct atom_str *name; - struct tree_entry_ms { - uint16_t mode; - struct object_id oid; - } versions[2]; -}; - -struct tree_content { - unsigned int entry_capacity; /* must match avail_tree_content */ - unsigned int entry_count; - unsigned int delta_depth; - struct tree_entry *entries[FLEX_ARRAY]; /* more */ -}; - -struct avail_tree_content { - unsigned int entry_capacity; /* must match tree_content */ - struct avail_tree_content *next_avail; -}; - -struct branch { - struct branch *table_next_branch; - struct branch *active_next_branch; - const char *name; - struct tree_entry branch_tree; - uintmax_t last_commit; - uintmax_t num_notes; - unsigned active : 1; - unsigned delete : 1; - unsigned pack_id : PACK_ID_BITS; - struct object_id oid; -}; - -struct tag { - struct tag *next_tag; - const char *name; - unsigned int pack_id; - struct object_id oid; -}; - -struct hash_list { - struct hash_list *next; - struct object_id oid; -}; - -typedef enum { - WHENSPEC_RAW = 1, - WHENSPEC_RAW_PERMISSIVE, - WHENSPEC_RFC2822, - WHENSPEC_NOW -} whenspec_type; - -struct recent_command { - struct recent_command *prev; - struct recent_command *next; - char *buf; -}; - -typedef void (*mark_set_inserter_t)(struct mark_set *s, struct object_id *oid, uintmax_t mark); -typedef void (*each_mark_fn_t)(uintmax_t mark, void *obj, void *cbp); - -/* Configured limits on output */ -static unsigned long max_depth = 50; -static off_t max_packsize; -static int unpack_limit = 100; -static int force_update; - -/* Stats and misc. counters */ -static uintmax_t alloc_count; -static uintmax_t marks_set_count; -static uintmax_t object_count_by_type[1 << TYPE_BITS]; -static uintmax_t duplicate_count_by_type[1 << TYPE_BITS]; -static uintmax_t delta_count_by_type[1 << TYPE_BITS]; -static uintmax_t delta_count_attempts_by_type[1 << TYPE_BITS]; -static unsigned long object_count; -static unsigned long branch_count; -static unsigned long branch_load_count; -static int failure; -static FILE *pack_edges; -static unsigned int show_stats = 1; -static int global_argc; -static const char **global_argv; - -/* Memory pools */ -static struct mem_pool fi_mem_pool = {NULL, 2*1024*1024 - - sizeof(struct mp_block), 0 }; - -/* Atom management */ -static unsigned int atom_table_sz = 4451; -static unsigned int atom_cnt; -static struct atom_str **atom_table; - -/* The .pack file being generated */ -static struct pack_idx_option pack_idx_opts; -static unsigned int pack_id; -static struct hashfile *pack_file; -static struct packed_git *pack_data; -static struct packed_git **all_packs; -static off_t pack_size; - -/* Table of objects we've written. */ -static unsigned int object_entry_alloc = 5000; -static struct object_entry_pool *blocks; -static struct hashmap object_table; -static struct mark_set *marks; -static const char *export_marks_file; -static const char *import_marks_file; -static int import_marks_file_from_stream; -static int import_marks_file_ignore_missing; -static int import_marks_file_done; -static int relative_marks_paths; - -/* Our last blob */ -static struct last_object last_blob = { STRBUF_INIT, 0, 0, 0 }; - -/* Tree management */ -static unsigned int tree_entry_alloc = 1000; -static void *avail_tree_entry; -static unsigned int avail_tree_table_sz = 100; -static struct avail_tree_content **avail_tree_table; -static size_t tree_entry_allocd; -static struct strbuf old_tree = STRBUF_INIT; -static struct strbuf new_tree = STRBUF_INIT; - -/* Branch data */ -static unsigned long max_active_branches = 5; -static unsigned long cur_active_branches; -static unsigned long branch_table_sz = 1039; -static struct branch **branch_table; -static struct branch *active_branches; - -/* Tag data */ -static struct tag *first_tag; -static struct tag *last_tag; - -/* Input stream parsing */ -static whenspec_type whenspec = WHENSPEC_RAW; -static struct strbuf command_buf = STRBUF_INIT; -static int unread_command_buf; -static struct recent_command cmd_hist = {&cmd_hist, &cmd_hist, NULL}; -static struct recent_command *cmd_tail = &cmd_hist; -static struct recent_command *rc_free; -static unsigned int cmd_save = 100; -static uintmax_t next_mark; -static struct strbuf new_data = STRBUF_INIT; -static int seen_data_command; -static int require_explicit_termination; -static int allow_unsafe_features; - -/* Signal handling */ -static volatile sig_atomic_t checkpoint_requested; - -/* Submodule marks */ -static struct string_list sub_marks_from = STRING_LIST_INIT_DUP; -static struct string_list sub_marks_to = STRING_LIST_INIT_DUP; -static kh_oid_map_t *sub_oid_map; - -/* Where to write output of cat-blob commands */ -static int cat_blob_fd = STDOUT_FILENO; - -static void parse_argv(void); -static void parse_get_mark(const char *p); -static void parse_cat_blob(const char *p); -static void parse_ls(const char *p, struct branch *b); - -static void for_each_mark(struct mark_set *m, uintmax_t base, each_mark_fn_t callback, void *p) -{ - uintmax_t k; - if (m->shift) { - for (k = 0; k < 1024; k++) { - if (m->data.sets[k]) - for_each_mark(m->data.sets[k], base + (k << m->shift), callback, p); - } - } else { - for (k = 0; k < 1024; k++) { - if (m->data.marked[k]) - callback(base + k, m->data.marked[k], p); - } - } -} - -static void dump_marks_fn(uintmax_t mark, void *object, void *cbp) { - struct object_entry *e = object; - FILE *f = cbp; - - fprintf(f, ":%" PRIuMAX " %s\n", mark, oid_to_hex(&e->idx.oid)); -} - -static void write_branch_report(FILE *rpt, struct branch *b) -{ - fprintf(rpt, "%s:\n", b->name); - - fprintf(rpt, " status :"); - if (b->active) - fputs(" active", rpt); - if (b->branch_tree.tree) - fputs(" loaded", rpt); - if (is_null_oid(&b->branch_tree.versions[1].oid)) - fputs(" dirty", rpt); - fputc('\n', rpt); - - fprintf(rpt, " tip commit : %s\n", oid_to_hex(&b->oid)); - fprintf(rpt, " old tree : %s\n", - oid_to_hex(&b->branch_tree.versions[0].oid)); - fprintf(rpt, " cur tree : %s\n", - oid_to_hex(&b->branch_tree.versions[1].oid)); - fprintf(rpt, " commit clock: %" PRIuMAX "\n", b->last_commit); - - fputs(" last pack : ", rpt); - if (b->pack_id < MAX_PACK_ID) - fprintf(rpt, "%u", b->pack_id); - fputc('\n', rpt); - - fputc('\n', rpt); -} - -static void write_crash_report(const char *err) -{ - char *loc = git_pathdup("fast_import_crash_%"PRIuMAX, (uintmax_t) getpid()); - FILE *rpt = fopen(loc, "w"); - struct branch *b; - unsigned long lu; - struct recent_command *rc; - - if (!rpt) { - error_errno("can't write crash report %s", loc); - free(loc); - return; - } - - fprintf(stderr, "fast-import: dumping crash report to %s\n", loc); - - fprintf(rpt, "fast-import crash report:\n"); - fprintf(rpt, " fast-import process: %"PRIuMAX"\n", (uintmax_t) getpid()); - fprintf(rpt, " parent process : %"PRIuMAX"\n", (uintmax_t) getppid()); - fprintf(rpt, " at %s\n", show_date(time(NULL), 0, DATE_MODE(ISO8601))); - fputc('\n', rpt); - - fputs("fatal: ", rpt); - fputs(err, rpt); - fputc('\n', rpt); - - fputc('\n', rpt); - fputs("Most Recent Commands Before Crash\n", rpt); - fputs("---------------------------------\n", rpt); - for (rc = cmd_hist.next; rc != &cmd_hist; rc = rc->next) { - if (rc->next == &cmd_hist) - fputs("* ", rpt); - else - fputs(" ", rpt); - fputs(rc->buf, rpt); - fputc('\n', rpt); - } - - fputc('\n', rpt); - fputs("Active Branch LRU\n", rpt); - fputs("-----------------\n", rpt); - fprintf(rpt, " active_branches = %lu cur, %lu max\n", - cur_active_branches, - max_active_branches); - fputc('\n', rpt); - fputs(" pos clock name\n", rpt); - fputs(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", rpt); - for (b = active_branches, lu = 0; b; b = b->active_next_branch) - fprintf(rpt, " %2lu) %6" PRIuMAX" %s\n", - ++lu, b->last_commit, b->name); - - fputc('\n', rpt); - fputs("Inactive Branches\n", rpt); - fputs("-----------------\n", rpt); - for (lu = 0; lu < branch_table_sz; lu++) { - for (b = branch_table[lu]; b; b = b->table_next_branch) - write_branch_report(rpt, b); - } - - if (first_tag) { - struct tag *tg; - fputc('\n', rpt); - fputs("Annotated Tags\n", rpt); - fputs("--------------\n", rpt); - for (tg = first_tag; tg; tg = tg->next_tag) { - fputs(oid_to_hex(&tg->oid), rpt); - fputc(' ', rpt); - fputs(tg->name, rpt); - fputc('\n', rpt); - } - } - - fputc('\n', rpt); - fputs("Marks\n", rpt); - fputs("-----\n", rpt); - if (export_marks_file) - fprintf(rpt, " exported to %s\n", export_marks_file); - else - for_each_mark(marks, 0, dump_marks_fn, rpt); - - fputc('\n', rpt); - fputs("-------------------\n", rpt); - fputs("END OF CRASH REPORT\n", rpt); - fclose(rpt); - free(loc); -} - -static void end_packfile(void); -static void unkeep_all_packs(void); -static void dump_marks(void); - -static NORETURN void die_nicely(const char *err, va_list params) -{ - static int zombie; - char message[2 * PATH_MAX]; - - vsnprintf(message, sizeof(message), err, params); - fputs("fatal: ", stderr); - fputs(message, stderr); - fputc('\n', stderr); - - if (!zombie) { - zombie = 1; - write_crash_report(message); - end_packfile(); - unkeep_all_packs(); - dump_marks(); - } - exit(128); -} - -#ifndef SIGUSR1 /* Windows, for example */ - -static void set_checkpoint_signal(void) -{ -} - -#else - -static void checkpoint_signal(int signo) -{ - checkpoint_requested = 1; -} - -static void set_checkpoint_signal(void) -{ - struct sigaction sa; - - memset(&sa, 0, sizeof(sa)); - sa.sa_handler = checkpoint_signal; - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_RESTART; - sigaction(SIGUSR1, &sa, NULL); -} - -#endif - -static void alloc_objects(unsigned int cnt) -{ - struct object_entry_pool *b; - - b = xmalloc(sizeof(struct object_entry_pool) - + cnt * sizeof(struct object_entry)); - b->next_pool = blocks; - b->next_free = b->entries; - b->end = b->entries + cnt; - blocks = b; - alloc_count += cnt; -} - -static struct object_entry *new_object(struct object_id *oid) -{ - struct object_entry *e; - - if (blocks->next_free == blocks->end) - alloc_objects(object_entry_alloc); - - e = blocks->next_free++; - oidcpy(&e->idx.oid, oid); - return e; -} - -static struct object_entry *find_object(struct object_id *oid) -{ - return hashmap_get_entry_from_hash(&object_table, oidhash(oid), oid, - struct object_entry, ent); -} - -static struct object_entry *insert_object(struct object_id *oid) -{ - struct object_entry *e; - unsigned int hash = oidhash(oid); - - e = hashmap_get_entry_from_hash(&object_table, hash, oid, - struct object_entry, ent); - if (!e) { - e = new_object(oid); - e->idx.offset = 0; - hashmap_entry_init(&e->ent, hash); - hashmap_add(&object_table, &e->ent); - } - - return e; -} - -static void invalidate_pack_id(unsigned int id) -{ - unsigned long lu; - struct tag *t; - struct hashmap_iter iter; - struct object_entry *e; - - hashmap_for_each_entry(&object_table, &iter, e, ent) { - if (e->pack_id == id) - e->pack_id = MAX_PACK_ID; - } - - for (lu = 0; lu < branch_table_sz; lu++) { - struct branch *b; - - for (b = branch_table[lu]; b; b = b->table_next_branch) - if (b->pack_id == id) - b->pack_id = MAX_PACK_ID; - } - - for (t = first_tag; t; t = t->next_tag) - if (t->pack_id == id) - t->pack_id = MAX_PACK_ID; -} - -static unsigned int hc_str(const char *s, size_t len) -{ - unsigned int r = 0; - while (len-- > 0) - r = r * 31 + *s++; - return r; -} - -static char *pool_strdup(const char *s) -{ - size_t len = strlen(s) + 1; - char *r = mem_pool_alloc(&fi_mem_pool, len); - memcpy(r, s, len); - return r; -} - -static void insert_mark(struct mark_set *s, uintmax_t idnum, struct object_entry *oe) -{ - while ((idnum >> s->shift) >= 1024) { - s = mem_pool_calloc(&fi_mem_pool, 1, sizeof(struct mark_set)); - s->shift = marks->shift + 10; - s->data.sets[0] = marks; - marks = s; - } - while (s->shift) { - uintmax_t i = idnum >> s->shift; - idnum -= i << s->shift; - if (!s->data.sets[i]) { - s->data.sets[i] = mem_pool_calloc(&fi_mem_pool, 1, sizeof(struct mark_set)); - s->data.sets[i]->shift = s->shift - 10; - } - s = s->data.sets[i]; - } - if (!s->data.marked[idnum]) - marks_set_count++; - s->data.marked[idnum] = oe; -} - -static void *find_mark(struct mark_set *s, uintmax_t idnum) -{ - uintmax_t orig_idnum = idnum; - struct object_entry *oe = NULL; - if ((idnum >> s->shift) < 1024) { - while (s && s->shift) { - uintmax_t i = idnum >> s->shift; - idnum -= i << s->shift; - s = s->data.sets[i]; - } - if (s) - oe = s->data.marked[idnum]; - } - if (!oe) - die("mark :%" PRIuMAX " not declared", orig_idnum); - return oe; -} - -static struct atom_str *to_atom(const char *s, unsigned short len) -{ - unsigned int hc = hc_str(s, len) % atom_table_sz; - struct atom_str *c; - - for (c = atom_table[hc]; c; c = c->next_atom) - if (c->str_len == len && !strncmp(s, c->str_dat, len)) - return c; - - c = mem_pool_alloc(&fi_mem_pool, sizeof(struct atom_str) + len + 1); - c->str_len = len; - memcpy(c->str_dat, s, len); - c->str_dat[len] = 0; - c->next_atom = atom_table[hc]; - atom_table[hc] = c; - atom_cnt++; - return c; -} - -static struct branch *lookup_branch(const char *name) -{ - unsigned int hc = hc_str(name, strlen(name)) % branch_table_sz; - struct branch *b; - - for (b = branch_table[hc]; b; b = b->table_next_branch) - if (!strcmp(name, b->name)) - return b; - return NULL; -} - -static struct branch *new_branch(const char *name) -{ - unsigned int hc = hc_str(name, strlen(name)) % branch_table_sz; - struct branch *b = lookup_branch(name); - - if (b) - die("Invalid attempt to create duplicate branch: %s", name); - if (check_refname_format(name, REFNAME_ALLOW_ONELEVEL)) - die("Branch name doesn't conform to GIT standards: %s", name); - - b = mem_pool_calloc(&fi_mem_pool, 1, sizeof(struct branch)); - b->name = pool_strdup(name); - b->table_next_branch = branch_table[hc]; - b->branch_tree.versions[0].mode = S_IFDIR; - b->branch_tree.versions[1].mode = S_IFDIR; - b->num_notes = 0; - b->active = 0; - b->pack_id = MAX_PACK_ID; - branch_table[hc] = b; - branch_count++; - return b; -} - -static unsigned int hc_entries(unsigned int cnt) -{ - cnt = cnt & 7 ? (cnt / 8) + 1 : cnt / 8; - return cnt < avail_tree_table_sz ? cnt : avail_tree_table_sz - 1; -} - -static struct tree_content *new_tree_content(unsigned int cnt) -{ - struct avail_tree_content *f, *l = NULL; - struct tree_content *t; - unsigned int hc = hc_entries(cnt); - - for (f = avail_tree_table[hc]; f; l = f, f = f->next_avail) - if (f->entry_capacity >= cnt) - break; - - if (f) { - if (l) - l->next_avail = f->next_avail; - else - avail_tree_table[hc] = f->next_avail; - } else { - cnt = cnt & 7 ? ((cnt / 8) + 1) * 8 : cnt; - f = mem_pool_alloc(&fi_mem_pool, sizeof(*t) + sizeof(t->entries[0]) * cnt); - f->entry_capacity = cnt; - } - - t = (struct tree_content*)f; - t->entry_count = 0; - t->delta_depth = 0; - return t; -} - -static void release_tree_entry(struct tree_entry *e); -static void release_tree_content(struct tree_content *t) -{ - struct avail_tree_content *f = (struct avail_tree_content*)t; - unsigned int hc = hc_entries(f->entry_capacity); - f->next_avail = avail_tree_table[hc]; - avail_tree_table[hc] = f; -} - -static void release_tree_content_recursive(struct tree_content *t) -{ - unsigned int i; - for (i = 0; i < t->entry_count; i++) - release_tree_entry(t->entries[i]); - release_tree_content(t); -} - -static struct tree_content *grow_tree_content( - struct tree_content *t, - int amt) -{ - struct tree_content *r = new_tree_content(t->entry_count + amt); - r->entry_count = t->entry_count; - r->delta_depth = t->delta_depth; - COPY_ARRAY(r->entries, t->entries, t->entry_count); - release_tree_content(t); - return r; -} - -static struct tree_entry *new_tree_entry(void) -{ - struct tree_entry *e; - - if (!avail_tree_entry) { - unsigned int n = tree_entry_alloc; - tree_entry_allocd += n * sizeof(struct tree_entry); - ALLOC_ARRAY(e, n); - avail_tree_entry = e; - while (n-- > 1) { - *((void**)e) = e + 1; - e++; - } - *((void**)e) = NULL; - } - - e = avail_tree_entry; - avail_tree_entry = *((void**)e); - return e; -} - -static void release_tree_entry(struct tree_entry *e) -{ - if (e->tree) - release_tree_content_recursive(e->tree); - *((void**)e) = avail_tree_entry; - avail_tree_entry = e; -} - -static struct tree_content *dup_tree_content(struct tree_content *s) -{ - struct tree_content *d; - struct tree_entry *a, *b; - unsigned int i; - - if (!s) - return NULL; - d = new_tree_content(s->entry_count); - for (i = 0; i < s->entry_count; i++) { - a = s->entries[i]; - b = new_tree_entry(); - memcpy(b, a, sizeof(*a)); - if (a->tree && is_null_oid(&b->versions[1].oid)) - b->tree = dup_tree_content(a->tree); - else - b->tree = NULL; - d->entries[i] = b; - } - d->entry_count = s->entry_count; - d->delta_depth = s->delta_depth; - - return d; -} - -static void start_packfile(void) -{ - struct strbuf tmp_file = STRBUF_INIT; - struct packed_git *p; - struct pack_header hdr; - int pack_fd; - - pack_fd = odb_mkstemp(&tmp_file, "pack/tmp_pack_XXXXXX"); - FLEX_ALLOC_STR(p, pack_name, tmp_file.buf); - strbuf_release(&tmp_file); - - p->pack_fd = pack_fd; - p->do_not_close = 1; - pack_file = hashfd(pack_fd, p->pack_name); - - hdr.hdr_signature = htonl(PACK_SIGNATURE); - hdr.hdr_version = htonl(2); - hdr.hdr_entries = 0; - hashwrite(pack_file, &hdr, sizeof(hdr)); - - pack_data = p; - pack_size = sizeof(hdr); - object_count = 0; - - REALLOC_ARRAY(all_packs, pack_id + 1); - all_packs[pack_id] = p; -} - -static const char *create_index(void) -{ - const char *tmpfile; - struct pack_idx_entry **idx, **c, **last; - struct object_entry *e; - struct object_entry_pool *o; - - /* Build the table of object IDs. */ - ALLOC_ARRAY(idx, object_count); - c = idx; - for (o = blocks; o; o = o->next_pool) - for (e = o->next_free; e-- != o->entries;) - if (pack_id == e->pack_id) - *c++ = &e->idx; - last = idx + object_count; - if (c != last) - die("internal consistency error creating the index"); - - tmpfile = write_idx_file(NULL, idx, object_count, &pack_idx_opts, - pack_data->hash); - free(idx); - return tmpfile; -} - -static char *keep_pack(const char *curr_index_name) -{ - static const char *keep_msg = "fast-import"; - struct strbuf name = STRBUF_INIT; - int keep_fd; - - odb_pack_name(&name, pack_data->hash, "keep"); - keep_fd = odb_pack_keep(name.buf); - if (keep_fd < 0) - die_errno("cannot create keep file"); - write_or_die(keep_fd, keep_msg, strlen(keep_msg)); - if (close(keep_fd)) - die_errno("failed to write keep file"); - - odb_pack_name(&name, pack_data->hash, "pack"); - if (finalize_object_file(pack_data->pack_name, name.buf)) - die("cannot store pack file"); - - odb_pack_name(&name, pack_data->hash, "idx"); - if (finalize_object_file(curr_index_name, name.buf)) - die("cannot store index file"); - free((void *)curr_index_name); - return strbuf_detach(&name, NULL); -} - -static void unkeep_all_packs(void) -{ - struct strbuf name = STRBUF_INIT; - int k; - - for (k = 0; k < pack_id; k++) { - struct packed_git *p = all_packs[k]; - odb_pack_name(&name, p->hash, "keep"); - unlink_or_warn(name.buf); - } - strbuf_release(&name); -} - -static int loosen_small_pack(const struct packed_git *p) -{ - struct child_process unpack = CHILD_PROCESS_INIT; - - if (lseek(p->pack_fd, 0, SEEK_SET) < 0) - die_errno("Failed seeking to start of '%s'", p->pack_name); - - unpack.in = p->pack_fd; - unpack.git_cmd = 1; - unpack.stdout_to_stderr = 1; - strvec_push(&unpack.args, "unpack-objects"); - if (!show_stats) - strvec_push(&unpack.args, "-q"); - - return run_command(&unpack); -} - -static void end_packfile(void) -{ - static int running; - - if (running || !pack_data) - return; - - running = 1; - clear_delta_base_cache(); - if (object_count) { - struct packed_git *new_p; - struct object_id cur_pack_oid; - char *idx_name; - int i; - struct branch *b; - struct tag *t; - - close_pack_windows(pack_data); - finalize_hashfile(pack_file, cur_pack_oid.hash, 0); - fixup_pack_header_footer(pack_data->pack_fd, pack_data->hash, - pack_data->pack_name, object_count, - cur_pack_oid.hash, pack_size); - - if (object_count <= unpack_limit) { - if (!loosen_small_pack(pack_data)) { - invalidate_pack_id(pack_id); - goto discard_pack; - } - } - - close(pack_data->pack_fd); - idx_name = keep_pack(create_index()); - - /* Register the packfile with core git's machinery. */ - new_p = add_packed_git(idx_name, strlen(idx_name), 1); - if (!new_p) - die("core git rejected index %s", idx_name); - all_packs[pack_id] = new_p; - install_packed_git(the_repository, new_p); - free(idx_name); - - /* Print the boundary */ - if (pack_edges) { - fprintf(pack_edges, "%s:", new_p->pack_name); - for (i = 0; i < branch_table_sz; i++) { - for (b = branch_table[i]; b; b = b->table_next_branch) { - if (b->pack_id == pack_id) - fprintf(pack_edges, " %s", - oid_to_hex(&b->oid)); - } - } - for (t = first_tag; t; t = t->next_tag) { - if (t->pack_id == pack_id) - fprintf(pack_edges, " %s", - oid_to_hex(&t->oid)); - } - fputc('\n', pack_edges); - fflush(pack_edges); - } - - pack_id++; - } - else { -discard_pack: - close(pack_data->pack_fd); - unlink_or_warn(pack_data->pack_name); - } - FREE_AND_NULL(pack_data); - running = 0; - - /* We can't carry a delta across packfiles. */ - strbuf_release(&last_blob.data); - last_blob.offset = 0; - last_blob.depth = 0; -} - -static void cycle_packfile(void) -{ - end_packfile(); - start_packfile(); -} - -static int store_object( - enum object_type type, - struct strbuf *dat, - struct last_object *last, - struct object_id *oidout, - uintmax_t mark) -{ - void *out, *delta; - struct object_entry *e; - unsigned char hdr[96]; - struct object_id oid; - unsigned long hdrlen, deltalen; - git_hash_ctx c; - git_zstream s; - - hdrlen = xsnprintf((char *)hdr, sizeof(hdr), "%s %lu", - type_name(type), (unsigned long)dat->len) + 1; - the_hash_algo->init_fn(&c); - the_hash_algo->update_fn(&c, hdr, hdrlen); - the_hash_algo->update_fn(&c, dat->buf, dat->len); - the_hash_algo->final_fn(oid.hash, &c); - if (oidout) - oidcpy(oidout, &oid); - - e = insert_object(&oid); - if (mark) - insert_mark(marks, mark, e); - if (e->idx.offset) { - duplicate_count_by_type[type]++; - return 1; - } else if (find_sha1_pack(oid.hash, - get_all_packs(the_repository))) { - e->type = type; - e->pack_id = MAX_PACK_ID; - e->idx.offset = 1; /* just not zero! */ - duplicate_count_by_type[type]++; - return 1; - } - - if (last && last->data.len && last->data.buf && last->depth < max_depth - && dat->len > the_hash_algo->rawsz) { - - delta_count_attempts_by_type[type]++; - delta = diff_delta(last->data.buf, last->data.len, - dat->buf, dat->len, - &deltalen, dat->len - the_hash_algo->rawsz); - } else - delta = NULL; - - git_deflate_init(&s, pack_compression_level); - if (delta) { - s.next_in = delta; - s.avail_in = deltalen; - } else { - s.next_in = (void *)dat->buf; - s.avail_in = dat->len; - } - s.avail_out = git_deflate_bound(&s, s.avail_in); - s.next_out = out = xmalloc(s.avail_out); - while (git_deflate(&s, Z_FINISH) == Z_OK) - ; /* nothing */ - git_deflate_end(&s); - - /* Determine if we should auto-checkpoint. */ - if ((max_packsize - && (pack_size + PACK_SIZE_THRESHOLD + s.total_out) > max_packsize) - || (pack_size + PACK_SIZE_THRESHOLD + s.total_out) < pack_size) { - - /* This new object needs to *not* have the current pack_id. */ - e->pack_id = pack_id + 1; - cycle_packfile(); - - /* We cannot carry a delta into the new pack. */ - if (delta) { - FREE_AND_NULL(delta); - - git_deflate_init(&s, pack_compression_level); - s.next_in = (void *)dat->buf; - s.avail_in = dat->len; - s.avail_out = git_deflate_bound(&s, s.avail_in); - s.next_out = out = xrealloc(out, s.avail_out); - while (git_deflate(&s, Z_FINISH) == Z_OK) - ; /* nothing */ - git_deflate_end(&s); - } - } - - e->type = type; - e->pack_id = pack_id; - e->idx.offset = pack_size; - object_count++; - object_count_by_type[type]++; - - crc32_begin(pack_file); - - if (delta) { - off_t ofs = e->idx.offset - last->offset; - unsigned pos = sizeof(hdr) - 1; - - delta_count_by_type[type]++; - e->depth = last->depth + 1; - - hdrlen = encode_in_pack_object_header(hdr, sizeof(hdr), - OBJ_OFS_DELTA, deltalen); - hashwrite(pack_file, hdr, hdrlen); - pack_size += hdrlen; - - hdr[pos] = ofs & 127; - while (ofs >>= 7) - hdr[--pos] = 128 | (--ofs & 127); - hashwrite(pack_file, hdr + pos, sizeof(hdr) - pos); - pack_size += sizeof(hdr) - pos; - } else { - e->depth = 0; - hdrlen = encode_in_pack_object_header(hdr, sizeof(hdr), - type, dat->len); - hashwrite(pack_file, hdr, hdrlen); - pack_size += hdrlen; - } - - hashwrite(pack_file, out, s.total_out); - pack_size += s.total_out; - - e->idx.crc32 = crc32_end(pack_file); - - free(out); - free(delta); - if (last) { - if (last->no_swap) { - last->data = *dat; - } else { - strbuf_swap(&last->data, dat); - } - last->offset = e->idx.offset; - last->depth = e->depth; - } - return 0; -} - -static void truncate_pack(struct hashfile_checkpoint *checkpoint) -{ - if (hashfile_truncate(pack_file, checkpoint)) - die_errno("cannot truncate pack to skip duplicate"); - pack_size = checkpoint->offset; -} - -static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) -{ - size_t in_sz = 64 * 1024, out_sz = 64 * 1024; - unsigned char *in_buf = xmalloc(in_sz); - unsigned char *out_buf = xmalloc(out_sz); - struct object_entry *e; - struct object_id oid; - unsigned long hdrlen; - off_t offset; - git_hash_ctx c; - git_zstream s; - struct hashfile_checkpoint checkpoint; - int status = Z_OK; - - /* Determine if we should auto-checkpoint. */ - if ((max_packsize - && (pack_size + PACK_SIZE_THRESHOLD + len) > max_packsize) - || (pack_size + PACK_SIZE_THRESHOLD + len) < pack_size) - cycle_packfile(); - - hashfile_checkpoint(pack_file, &checkpoint); - offset = checkpoint.offset; - - hdrlen = xsnprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1; - - the_hash_algo->init_fn(&c); - the_hash_algo->update_fn(&c, out_buf, hdrlen); - - crc32_begin(pack_file); - - git_deflate_init(&s, pack_compression_level); - - hdrlen = encode_in_pack_object_header(out_buf, out_sz, OBJ_BLOB, len); - - s.next_out = out_buf + hdrlen; - s.avail_out = out_sz - hdrlen; - - while (status != Z_STREAM_END) { - if (0 < len && !s.avail_in) { - size_t cnt = in_sz < len ? in_sz : (size_t)len; - size_t n = fread(in_buf, 1, cnt, stdin); - if (!n && feof(stdin)) - die("EOF in data (%" PRIuMAX " bytes remaining)", len); - - the_hash_algo->update_fn(&c, in_buf, n); - s.next_in = in_buf; - s.avail_in = n; - len -= n; - } - - status = git_deflate(&s, len ? 0 : Z_FINISH); - - if (!s.avail_out || status == Z_STREAM_END) { - size_t n = s.next_out - out_buf; - hashwrite(pack_file, out_buf, n); - pack_size += n; - s.next_out = out_buf; - s.avail_out = out_sz; - } - - switch (status) { - case Z_OK: - case Z_BUF_ERROR: - case Z_STREAM_END: - continue; - default: - die("unexpected deflate failure: %d", status); - } - } - git_deflate_end(&s); - the_hash_algo->final_fn(oid.hash, &c); - - if (oidout) - oidcpy(oidout, &oid); - - e = insert_object(&oid); - - if (mark) - insert_mark(marks, mark, e); - - if (e->idx.offset) { - duplicate_count_by_type[OBJ_BLOB]++; - truncate_pack(&checkpoint); - - } else if (find_sha1_pack(oid.hash, - get_all_packs(the_repository))) { - e->type = OBJ_BLOB; - e->pack_id = MAX_PACK_ID; - e->idx.offset = 1; /* just not zero! */ - duplicate_count_by_type[OBJ_BLOB]++; - truncate_pack(&checkpoint); - - } else { - e->depth = 0; - e->type = OBJ_BLOB; - e->pack_id = pack_id; - e->idx.offset = offset; - e->idx.crc32 = crc32_end(pack_file); - object_count++; - object_count_by_type[OBJ_BLOB]++; - } - - free(in_buf); - free(out_buf); -} - -/* All calls must be guarded by find_object() or find_mark() to - * ensure the 'struct object_entry' passed was written by this - * process instance. We unpack the entry by the offset, avoiding - * the need for the corresponding .idx file. This unpacking rule - * works because we only use OBJ_REF_DELTA within the packfiles - * created by fast-import. - * - * oe must not be NULL. Such an oe u |