summaryrefslogtreecommitdiff
path: root/builtin/fast-import.c
diff options
context:
space:
mode:
authorLibravatar Junio C Hamano <gitster@pobox.com>2020-09-03 12:37:02 -0700
committerLibravatar Junio C Hamano <gitster@pobox.com>2020-09-03 12:37:02 -0700
commitafd49c39dd0089ab887f77be0a8a5a2fc5054cfa (patch)
treecc12a024697351c99565d2b1a9cc127d3c26df99 /builtin/fast-import.c
parentMerge branch 'pw/add-p-allowed-options-fix' (diff)
parentdrop vcs-svn experiment (diff)
downloadtgif-afd49c39dd0089ab887f77be0a8a5a2fc5054cfa.tar.xz
Merge branch 'jk/slimmed-down'
Trim an unused binary and turn a bunch of commands into built-in. * jk/slimmed-down: drop vcs-svn experiment make git-fast-import a builtin make git-bugreport a builtin make credential helpers builtins Makefile: drop builtins from MSVC pdb list
Diffstat (limited to 'builtin/fast-import.c')
-rw-r--r--builtin/fast-import.c3640
1 files changed, 3640 insertions, 0 deletions
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
new file mode 100644
index 0000000000..1c85eafe43
--- /dev/null
+++ b/builtin/fast-import.c
@@ -0,0 +1,3640 @@
+#include "builtin.h"
+#include "cache.h"
+#include "repository.h"
+#include "config.h"
+#include "lockfile.h"
+#include "object.h"
+#include "blob.h"
+#include "tree.h"
+#include "commit.h"
+#include "delta.h"
+#include "pack.h"
+#include "refs.h"
+#include "csum-file.h"
+#include "quote.h"
+#include "dir.h"
+#include "run-command.h"
+#include "packfile.h"
+#include "object-store.h"
+#include "mem-pool.h"
+#include "commit-reach.h"
+#include "khash.h"
+
+#define PACK_ID_BITS 16
+#define MAX_PACK_ID ((1<<PACK_ID_BITS)-1)
+#define DEPTH_BITS 13
+#define MAX_DEPTH ((1<<DEPTH_BITS)-1)
+
+/*
+ * We abuse the setuid bit on directories to mean "do not delta".
+ */
+#define NO_DELTA S_ISUID
+
+/*
+ * The amount of additional space required in order to write an object into the
+ * current pack. This is the hash lengths at the end of the pack, plus the
+ * length of one object ID.
+ */
+#define PACK_SIZE_THRESHOLD (the_hash_algo->rawsz * 3)
+
+struct object_entry {
+ struct pack_idx_entry idx;
+ struct hashmap_entry ent;
+ uint32_t type : TYPE_BITS,
+ pack_id : PACK_ID_BITS,
+ depth : DEPTH_BITS;
+};
+
+static int object_entry_hashcmp(const void *map_data,
+ const struct hashmap_entry *eptr,
+ const struct hashmap_entry *entry_or_key,
+ const void *keydata)
+{
+ const struct object_id *oid = keydata;
+ const struct object_entry *e1, *e2;
+
+ e1 = container_of(eptr, const struct object_entry, ent);
+ if (oid)
+ return oidcmp(&e1->idx.oid, oid);
+
+ e2 = container_of(entry_or_key, const struct object_entry, ent);
+ return oidcmp(&e1->idx.oid, &e2->idx.oid);
+}
+
+struct object_entry_pool {
+ struct object_entry_pool *next_pool;
+ struct object_entry *next_free;
+ struct object_entry *end;
+ struct object_entry entries[FLEX_ARRAY]; /* more */
+};
+
+struct mark_set {
+ union {
+ struct object_id *oids[1024];
+ struct object_entry *marked[1024];
+ struct mark_set *sets[1024];
+ } data;
+ unsigned int shift;
+};
+
+struct last_object {
+ struct strbuf data;
+ off_t offset;
+ unsigned int depth;
+ unsigned no_swap : 1;
+};
+
+struct atom_str {
+ struct atom_str *next_atom;
+ unsigned short str_len;
+ char str_dat[FLEX_ARRAY]; /* more */
+};
+
+struct tree_content;
+struct tree_entry {
+ struct tree_content *tree;
+ struct atom_str *name;
+ struct tree_entry_ms {
+ uint16_t mode;
+ struct object_id oid;
+ } versions[2];
+};
+
+struct tree_content {
+ unsigned int entry_capacity; /* must match avail_tree_content */
+ unsigned int entry_count;
+ unsigned int delta_depth;
+ struct tree_entry *entries[FLEX_ARRAY]; /* more */
+};
+
+struct avail_tree_content {
+ unsigned int entry_capacity; /* must match tree_content */
+ struct avail_tree_content *next_avail;
+};
+
+struct branch {
+ struct branch *table_next_branch;
+ struct branch *active_next_branch;
+ const char *name;
+ struct tree_entry branch_tree;
+ uintmax_t last_commit;
+ uintmax_t num_notes;
+ unsigned active : 1;
+ unsigned delete : 1;
+ unsigned pack_id : PACK_ID_BITS;
+ struct object_id oid;
+};
+
+struct tag {
+ struct tag *next_tag;
+ const char *name;
+ unsigned int pack_id;
+ struct object_id oid;
+};
+
+struct hash_list {
+ struct hash_list *next;
+ struct object_id oid;
+};
+
+typedef enum {
+ WHENSPEC_RAW = 1,
+ WHENSPEC_RAW_PERMISSIVE,
+ WHENSPEC_RFC2822,
+ WHENSPEC_NOW
+} whenspec_type;
+
+struct recent_command {
+ struct recent_command *prev;
+ struct recent_command *next;
+ char *buf;
+};
+
+typedef void (*mark_set_inserter_t)(struct mark_set *s, struct object_id *oid, uintmax_t mark);
+typedef void (*each_mark_fn_t)(uintmax_t mark, void *obj, void *cbp);
+
+/* Configured limits on output */
+static unsigned long max_depth = 50;
+static off_t max_packsize;
+static int unpack_limit = 100;
+static int force_update;
+
+/* Stats and misc. counters */
+static uintmax_t alloc_count;
+static uintmax_t marks_set_count;
+static uintmax_t object_count_by_type[1 << TYPE_BITS];
+static uintmax_t duplicate_count_by_type[1 << TYPE_BITS];
+static uintmax_t delta_count_by_type[1 << TYPE_BITS];
+static uintmax_t delta_count_attempts_by_type[1 << TYPE_BITS];
+static unsigned long object_count;
+static unsigned long branch_count;
+static unsigned long branch_load_count;
+static int failure;
+static FILE *pack_edges;
+static unsigned int show_stats = 1;
+static int global_argc;
+static const char **global_argv;
+
+/* Memory pools */
+static struct mem_pool fi_mem_pool = {NULL, 2*1024*1024 -
+ sizeof(struct mp_block), 0 };
+
+/* Atom management */
+static unsigned int atom_table_sz = 4451;
+static unsigned int atom_cnt;
+static struct atom_str **atom_table;
+
+/* The .pack file being generated */
+static struct pack_idx_option pack_idx_opts;
+static unsigned int pack_id;
+static struct hashfile *pack_file;
+static struct packed_git *pack_data;
+static struct packed_git **all_packs;
+static off_t pack_size;
+
+/* Table of objects we've written. */
+static unsigned int object_entry_alloc = 5000;
+static struct object_entry_pool *blocks;
+static struct hashmap object_table;
+static struct mark_set *marks;
+static const char *export_marks_file;
+static const char *import_marks_file;
+static int import_marks_file_from_stream;
+static int import_marks_file_ignore_missing;
+static int import_marks_file_done;
+static int relative_marks_paths;
+
+/* Our last blob */
+static struct last_object last_blob = { STRBUF_INIT, 0, 0, 0 };
+
+/* Tree management */
+static unsigned int tree_entry_alloc = 1000;
+static void *avail_tree_entry;
+static unsigned int avail_tree_table_sz = 100;
+static struct avail_tree_content **avail_tree_table;
+static size_t tree_entry_allocd;
+static struct strbuf old_tree = STRBUF_INIT;
+static struct strbuf new_tree = STRBUF_INIT;
+
+/* Branch data */
+static unsigned long max_active_branches = 5;
+static unsigned long cur_active_branches;
+static unsigned long branch_table_sz = 1039;
+static struct branch **branch_table;
+static struct branch *active_branches;
+
+/* Tag data */
+static struct tag *first_tag;
+static struct tag *last_tag;
+
+/* Input stream parsing */
+static whenspec_type whenspec = WHENSPEC_RAW;
+static struct strbuf command_buf = STRBUF_INIT;
+static int unread_command_buf;
+static struct recent_command cmd_hist = {&cmd_hist, &cmd_hist, NULL};
+static struct recent_command *cmd_tail = &cmd_hist;
+static struct recent_command *rc_free;
+static unsigned int cmd_save = 100;
+static uintmax_t next_mark;
+static struct strbuf new_data = STRBUF_INIT;
+static int seen_data_command;
+static int require_explicit_termination;
+static int allow_unsafe_features;
+
+/* Signal handling */
+static volatile sig_atomic_t checkpoint_requested;
+
+/* Submodule marks */
+static struct string_list sub_marks_from = STRING_LIST_INIT_DUP;
+static struct string_list sub_marks_to = STRING_LIST_INIT_DUP;
+static kh_oid_map_t *sub_oid_map;
+
+/* Where to write output of cat-blob commands */
+static int cat_blob_fd = STDOUT_FILENO;
+
+static void parse_argv(void);
+static void parse_get_mark(const char *p);
+static void parse_cat_blob(const char *p);
+static void parse_ls(const char *p, struct branch *b);
+
+static void for_each_mark(struct mark_set *m, uintmax_t base, each_mark_fn_t callback, void *p)
+{
+ uintmax_t k;
+ if (m->shift) {
+ for (k = 0; k < 1024; k++) {
+ if (m->data.sets[k])
+ for_each_mark(m->data.sets[k], base + (k << m->shift), callback, p);
+ }
+ } else {
+ for (k = 0; k < 1024; k++) {
+ if (m->data.marked[k])
+ callback(base + k, m->data.marked[k], p);
+ }
+ }
+}
+
+static void dump_marks_fn(uintmax_t mark, void *object, void *cbp) {
+ struct object_entry *e = object;
+ FILE *f = cbp;
+
+ fprintf(f, ":%" PRIuMAX " %s\n", mark, oid_to_hex(&e->idx.oid));
+}
+
+static void write_branch_report(FILE *rpt, struct branch *b)
+{
+ fprintf(rpt, "%s:\n", b->name);
+
+ fprintf(rpt, " status :");
+ if (b->active)
+ fputs(" active", rpt);
+ if (b->branch_tree.tree)
+ fputs(" loaded", rpt);
+ if (is_null_oid(&b->branch_tree.versions[1].oid))
+ fputs(" dirty", rpt);
+ fputc('\n', rpt);
+
+ fprintf(rpt, " tip commit : %s\n", oid_to_hex(&b->oid));
+ fprintf(rpt, " old tree : %s\n",
+ oid_to_hex(&b->branch_tree.versions[0].oid));
+ fprintf(rpt, " cur tree : %s\n",
+ oid_to_hex(&b->branch_tree.versions[1].oid));
+ fprintf(rpt, " commit clock: %" PRIuMAX "\n", b->last_commit);
+
+ fputs(" last pack : ", rpt);
+ if (b->pack_id < MAX_PACK_ID)
+ fprintf(rpt, "%u", b->pack_id);
+ fputc('\n', rpt);
+
+ fputc('\n', rpt);
+}
+
+static void write_crash_report(const char *err)
+{
+ char *loc = git_pathdup("fast_import_crash_%"PRIuMAX, (uintmax_t) getpid());
+ FILE *rpt = fopen(loc, "w");
+ struct branch *b;
+ unsigned long lu;
+ struct recent_command *rc;
+
+ if (!rpt) {
+ error_errno("can't write crash report %s", loc);
+ free(loc);
+ return;
+ }
+
+ fprintf(stderr, "fast-import: dumping crash report to %s\n", loc);
+
+ fprintf(rpt, "fast-import crash report:\n");
+ fprintf(rpt, " fast-import process: %"PRIuMAX"\n", (uintmax_t) getpid());
+ fprintf(rpt, " parent process : %"PRIuMAX"\n", (uintmax_t) getppid());
+ fprintf(rpt, " at %s\n", show_date(time(NULL), 0, DATE_MODE(ISO8601)));
+ fputc('\n', rpt);
+
+ fputs("fatal: ", rpt);
+ fputs(err, rpt);
+ fputc('\n', rpt);
+
+ fputc('\n', rpt);
+ fputs("Most Recent Commands Before Crash\n", rpt);
+ fputs("---------------------------------\n", rpt);
+ for (rc = cmd_hist.next; rc != &cmd_hist; rc = rc->next) {
+ if (rc->next == &cmd_hist)
+ fputs("* ", rpt);
+ else
+ fputs(" ", rpt);
+ fputs(rc->buf, rpt);
+ fputc('\n', rpt);
+ }
+
+ fputc('\n', rpt);
+ fputs("Active Branch LRU\n", rpt);
+ fputs("-----------------\n", rpt);
+ fprintf(rpt, " active_branches = %lu cur, %lu max\n",
+ cur_active_branches,
+ max_active_branches);
+ fputc('\n', rpt);
+ fputs(" pos clock name\n", rpt);
+ fputs(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", rpt);
+ for (b = active_branches, lu = 0; b; b = b->active_next_branch)
+ fprintf(rpt, " %2lu) %6" PRIuMAX" %s\n",
+ ++lu, b->last_commit, b->name);
+
+ fputc('\n', rpt);
+ fputs("Inactive Branches\n", rpt);
+ fputs("-----------------\n", rpt);
+ for (lu = 0; lu < branch_table_sz; lu++) {
+ for (b = branch_table[lu]; b; b = b->table_next_branch)
+ write_branch_report(rpt, b);
+ }
+
+ if (first_tag) {
+ struct tag *tg;
+ fputc('\n', rpt);
+ fputs("Annotated Tags\n", rpt);
+ fputs("--------------\n", rpt);
+ for (tg = first_tag; tg; tg = tg->next_tag) {
+ fputs(oid_to_hex(&tg->oid), rpt);
+ fputc(' ', rpt);
+ fputs(tg->name, rpt);
+ fputc('\n', rpt);
+ }
+ }
+
+ fputc('\n', rpt);
+ fputs("Marks\n", rpt);
+ fputs("-----\n", rpt);
+ if (export_marks_file)
+ fprintf(rpt, " exported to %s\n", export_marks_file);
+ else
+ for_each_mark(marks, 0, dump_marks_fn, rpt);
+
+ fputc('\n', rpt);
+ fputs("-------------------\n", rpt);
+ fputs("END OF CRASH REPORT\n", rpt);
+ fclose(rpt);
+ free(loc);
+}
+
+static void end_packfile(void);
+static void unkeep_all_packs(void);
+static void dump_marks(void);
+
+static NORETURN void die_nicely(const char *err, va_list params)
+{
+ static int zombie;
+ char message[2 * PATH_MAX];
+
+ vsnprintf(message, sizeof(message), err, params);
+ fputs("fatal: ", stderr);
+ fputs(message, stderr);
+ fputc('\n', stderr);
+
+ if (!zombie) {
+ zombie = 1;
+ write_crash_report(message);
+ end_packfile();
+ unkeep_all_packs();
+ dump_marks();
+ }
+ exit(128);
+}
+
+#ifndef SIGUSR1 /* Windows, for example */
+
+static void set_checkpoint_signal(void)
+{
+}
+
+#else
+
+static void checkpoint_signal(int signo)
+{
+ checkpoint_requested = 1;
+}
+
+static void set_checkpoint_signal(void)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = checkpoint_signal;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_RESTART;
+ sigaction(SIGUSR1, &sa, NULL);
+}
+
+#endif
+
+static void alloc_objects(unsigned int cnt)
+{
+ struct object_entry_pool *b;
+
+ b = xmalloc(sizeof(struct object_entry_pool)
+ + cnt * sizeof(struct object_entry));
+ b->next_pool = blocks;
+ b->next_free = b->entries;
+ b->end = b->entries + cnt;
+ blocks = b;
+ alloc_count += cnt;
+}
+
+static struct object_entry *new_object(struct object_id *oid)
+{
+ struct object_entry *e;
+
+ if (blocks->next_free == blocks->end)
+ alloc_objects(object_entry_alloc);
+
+ e = blocks->next_free++;
+ oidcpy(&e->idx.oid, oid);
+ return e;
+}
+
+static struct object_entry *find_object(struct object_id *oid)
+{
+ return hashmap_get_entry_from_hash(&object_table, oidhash(oid), oid,
+ struct object_entry, ent);
+}
+
+static struct object_entry *insert_object(struct object_id *oid)
+{
+ struct object_entry *e;
+ unsigned int hash = oidhash(oid);
+
+ e = hashmap_get_entry_from_hash(&object_table, hash, oid,
+ struct object_entry, ent);
+ if (!e) {
+ e = new_object(oid);
+ e->idx.offset = 0;
+ hashmap_entry_init(&e->ent, hash);
+ hashmap_add(&object_table, &e->ent);
+ }
+
+ return e;
+}
+
+static void invalidate_pack_id(unsigned int id)
+{
+ unsigned long lu;
+ struct tag *t;
+ struct hashmap_iter iter;
+ struct object_entry *e;
+
+ hashmap_for_each_entry(&object_table, &iter, e, ent) {
+ if (e->pack_id == id)
+ e->pack_id = MAX_PACK_ID;
+ }
+
+ for (lu = 0; lu < branch_table_sz; lu++) {
+ struct branch *b;
+
+ for (b = branch_table[lu]; b; b = b->table_next_branch)
+ if (b->pack_id == id)
+ b->pack_id = MAX_PACK_ID;
+ }
+
+ for (t = first_tag; t; t = t->next_tag)
+ if (t->pack_id == id)
+ t->pack_id = MAX_PACK_ID;
+}
+
+static unsigned int hc_str(const char *s, size_t len)
+{
+ unsigned int r = 0;
+ while (len-- > 0)
+ r = r * 31 + *s++;
+ return r;
+}
+
+static void insert_mark(struct mark_set *s, uintmax_t idnum, struct object_entry *oe)
+{
+ while ((idnum >> s->shift) >= 1024) {
+ s = mem_pool_calloc(&fi_mem_pool, 1, sizeof(struct mark_set));
+ s->shift = marks->shift + 10;
+ s->data.sets[0] = marks;
+ marks = s;
+ }
+ while (s->shift) {
+ uintmax_t i = idnum >> s->shift;
+ idnum -= i << s->shift;
+ if (!s->data.sets[i]) {
+ s->data.sets[i] = mem_pool_calloc(&fi_mem_pool, 1, sizeof(struct mark_set));
+ s->data.sets[i]->shift = s->shift - 10;
+ }
+ s = s->data.sets[i];
+ }
+ if (!s->data.marked[idnum])
+ marks_set_count++;
+ s->data.marked[idnum] = oe;
+}
+
+static void *find_mark(struct mark_set *s, uintmax_t idnum)
+{
+ uintmax_t orig_idnum = idnum;
+ struct object_entry *oe = NULL;
+ if ((idnum >> s->shift) < 1024) {
+ while (s && s->shift) {
+ uintmax_t i = idnum >> s->shift;
+ idnum -= i << s->shift;
+ s = s->data.sets[i];
+ }
+ if (s)
+ oe = s->data.marked[idnum];
+ }
+ if (!oe)
+ die("mark :%" PRIuMAX " not declared", orig_idnum);
+ return oe;
+}
+
+static struct atom_str *to_atom(const char *s, unsigned short len)
+{
+ unsigned int hc = hc_str(s, len) % atom_table_sz;
+ struct atom_str *c;
+
+ for (c = atom_table[hc]; c; c = c->next_atom)
+ if (c->str_len == len && !strncmp(s, c->str_dat, len))
+ return c;
+
+ c = mem_pool_alloc(&fi_mem_pool, sizeof(struct atom_str) + len + 1);
+ c->str_len = len;
+ memcpy(c->str_dat, s, len);
+ c->str_dat[len] = 0;
+ c->next_atom = atom_table[hc];
+ atom_table[hc] = c;
+ atom_cnt++;
+ return c;
+}
+
+static struct branch *lookup_branch(const char *name)
+{
+ unsigned int hc = hc_str(name, strlen(name)) % branch_table_sz;
+ struct branch *b;
+
+ for (b = branch_table[hc]; b; b = b->table_next_branch)
+ if (!strcmp(name, b->name))
+ return b;
+ return NULL;
+}
+
+static struct branch *new_branch(const char *name)
+{
+ unsigned int hc = hc_str(name, strlen(name)) % branch_table_sz;
+ struct branch *b = lookup_branch(name);
+
+ if (b)
+ die("Invalid attempt to create duplicate branch: %s", name);
+ if (check_refname_format(name, REFNAME_ALLOW_ONELEVEL))
+ die("Branch name doesn't conform to GIT standards: %s", name);
+
+ b = mem_pool_calloc(&fi_mem_pool, 1, sizeof(struct branch));
+ b->name = mem_pool_strdup(&fi_mem_pool, name);
+ b->table_next_branch = branch_table[hc];
+ b->branch_tree.versions[0].mode = S_IFDIR;
+ b->branch_tree.versions[1].mode = S_IFDIR;
+ b->num_notes = 0;
+ b->active = 0;
+ b->pack_id = MAX_PACK_ID;
+ branch_table[hc] = b;
+ branch_count++;
+ return b;
+}
+
+static unsigned int hc_entries(unsigned int cnt)
+{
+ cnt = cnt & 7 ? (cnt / 8) + 1 : cnt / 8;
+ return cnt < avail_tree_table_sz ? cnt : avail_tree_table_sz - 1;
+}
+
+static struct tree_content *new_tree_content(unsigned int cnt)
+{
+ struct avail_tree_content *f, *l = NULL;
+ struct tree_content *t;
+ unsigned int hc = hc_entries(cnt);
+
+ for (f = avail_tree_table[hc]; f; l = f, f = f->next_avail)
+ if (f->entry_capacity >= cnt)
+ break;
+
+ if (f) {
+ if (l)
+ l->next_avail = f->next_avail;
+ else
+ avail_tree_table[hc] = f->next_avail;
+ } else {
+ cnt = cnt & 7 ? ((cnt / 8) + 1) * 8 : cnt;
+ f = mem_pool_alloc(&fi_mem_pool, sizeof(*t) + sizeof(t->entries[0]) * cnt);
+ f->entry_capacity = cnt;
+ }
+
+ t = (struct tree_content*)f;
+ t->entry_count = 0;
+ t->delta_depth = 0;
+ return t;
+}
+
+static void release_tree_entry(struct tree_entry *e);
+static void release_tree_content(struct tree_content *t)
+{
+ struct avail_tree_content *f = (struct avail_tree_content*)t;
+ unsigned int hc = hc_entries(f->entry_capacity);
+ f->next_avail = avail_tree_table[hc];
+ avail_tree_table[hc] = f;
+}
+
+static void release_tree_content_recursive(struct tree_content *t)
+{
+ unsigned int i;
+ for (i = 0; i < t->entry_count; i++)
+ release_tree_entry(t->entries[i]);
+ release_tree_content(t);
+}
+
+static struct tree_content *grow_tree_content(
+ struct tree_content *t,
+ int amt)
+{
+ struct tree_content *r = new_tree_content(t->entry_count + amt);
+ r->entry_count = t->entry_count;
+ r->delta_depth = t->delta_depth;
+ COPY_ARRAY(r->entries, t->entries, t->entry_count);
+ release_tree_content(t);
+ return r;
+}
+
+static struct tree_entry *new_tree_entry(void)
+{
+ struct tree_entry *e;
+
+ if (!avail_tree_entry) {
+ unsigned int n = tree_entry_alloc;
+ tree_entry_allocd += n * sizeof(struct tree_entry);
+ ALLOC_ARRAY(e, n);
+ avail_tree_entry = e;
+ while (n-- > 1) {
+ *((void**)e) = e + 1;
+ e++;
+ }
+ *((void**)e) = NULL;
+ }
+
+ e = avail_tree_entry;
+ avail_tree_entry = *((void**)e);
+ return e;
+}
+
+static void release_tree_entry(struct tree_entry *e)
+{
+ if (e->tree)
+ release_tree_content_recursive(e->tree);
+ *((void**)e) = avail_tree_entry;
+ avail_tree_entry = e;
+}
+
+static struct tree_content *dup_tree_content(struct tree_content *s)
+{
+ struct tree_content *d;
+ struct tree_entry *a, *b;
+ unsigned int i;
+
+ if (!s)
+ return NULL;
+ d = new_tree_content(s->entry_count);
+ for (i = 0; i < s->entry_count; i++) {
+ a = s->entries[i];
+ b = new_tree_entry();
+ memcpy(b, a, sizeof(*a));
+ if (a->tree && is_null_oid(&b->versions[1].oid))
+ b->tree = dup_tree_content(a->tree);
+ else
+ b->tree = NULL;
+ d->entries[i] = b;
+ }
+ d->entry_count = s->entry_count;
+ d->delta_depth = s->delta_depth;
+
+ return d;
+}
+
+static void start_packfile(void)
+{
+ struct strbuf tmp_file = STRBUF_INIT;
+ struct packed_git *p;
+ struct pack_header hdr;
+ int pack_fd;
+
+ pack_fd = odb_mkstemp(&tmp_file, "pack/tmp_pack_XXXXXX");
+ FLEX_ALLOC_STR(p, pack_name, tmp_file.buf);
+ strbuf_release(&tmp_file);
+
+ p->pack_fd = pack_fd;
+ p->do_not_close = 1;
+ pack_file = hashfd(pack_fd, p->pack_name);
+
+ hdr.hdr_signature = htonl(PACK_SIGNATURE);
+ hdr.hdr_version = htonl(2);
+ hdr.hdr_entries = 0;
+ hashwrite(pack_file, &hdr, sizeof(hdr));
+
+ pack_data = p;
+ pack_size = sizeof(hdr);
+ object_count = 0;
+
+ REALLOC_ARRAY(all_packs, pack_id + 1);
+ all_packs[pack_id] = p;
+}
+
+static const char *create_index(void)
+{
+ const char *tmpfile;
+ struct pack_idx_entry **idx, **c, **last;
+ struct object_entry *e;
+ struct object_entry_pool *o;
+
+ /* Build the table of object IDs. */
+ ALLOC_ARRAY(idx, object_count);
+ c = idx;
+ for (o = blocks; o; o = o->next_pool)
+ for (e = o->next_free; e-- != o->entries;)
+ if (pack_id == e->pack_id)
+ *c++ = &e->idx;
+ last = idx + object_count;
+ if (c != last)
+ die("internal consistency error creating the index");
+
+ tmpfile = write_idx_file(NULL, idx, object_count, &pack_idx_opts,
+ pack_data->hash);
+ free(idx);
+ return tmpfile;
+}
+
+static char *keep_pack(const char *curr_index_name)
+{
+ static const char *keep_msg = "fast-import";
+ struct strbuf name = STRBUF_INIT;
+ int keep_fd;
+
+ odb_pack_name(&name, pack_data->hash, "keep");
+ keep_fd = odb_pack_keep(name.buf);
+ if (keep_fd < 0)
+ die_errno("cannot create keep file");
+ write_or_die(keep_fd, keep_msg, strlen(keep_msg));
+ if (close(keep_fd))
+ die_errno("failed to write keep file");
+
+ odb_pack_name(&name, pack_data->hash, "pack");
+ if (finalize_object_file(pack_data->pack_name, name.buf))
+ die("cannot store pack file");
+
+ odb_pack_name(&name, pack_data->hash, "idx");
+ if (finalize_object_file(curr_index_name, name.buf))
+ die("cannot store index file");
+ free((void *)curr_index_name);
+ return strbuf_detach(&name, NULL);
+}
+
+static void unkeep_all_packs(void)
+{
+ struct strbuf name = STRBUF_INIT;
+ int k;
+
+ for (k = 0; k < pack_id; k++) {
+ struct packed_git *p = all_packs[k];
+ odb_pack_name(&name, p->hash, "keep");
+ unlink_or_warn(name.buf);
+ }
+ strbuf_release(&name);
+}
+
+static int loosen_small_pack(const struct packed_git *p)
+{
+ struct child_process unpack = CHILD_PROCESS_INIT;
+
+ if (lseek(p->pack_fd, 0, SEEK_SET) < 0)
+ die_errno("Failed seeking to start of '%s'", p->pack_name);
+
+ unpack.in = p->pack_fd;
+ unpack.git_cmd = 1;
+ unpack.stdout_to_stderr = 1;
+ strvec_push(&unpack.args, "unpack-objects");
+ if (!show_stats)
+ strvec_push(&unpack.args, "-q");
+
+ return run_command(&unpack);
+}
+
+static void end_packfile(void)
+{
+ static int running;
+
+ if (running || !pack_data)
+ return;
+
+ running = 1;
+ clear_delta_base_cache();
+ if (object_count) {
+ struct packed_git *new_p;
+ struct object_id cur_pack_oid;
+ char *idx_name;
+ int i;
+ struct branch *b;
+ struct tag *t;
+
+ close_pack_windows(pack_data);
+ finalize_hashfile(pack_file, cur_pack_oid.hash, 0);
+ fixup_pack_header_footer(pack_data->pack_fd, pack_data->hash,
+ pack_data->pack_name, object_count,
+ cur_pack_oid.hash, pack_size);
+
+ if (object_count <= unpack_limit) {
+ if (!loosen_small_pack(pack_data)) {
+ invalidate_pack_id(pack_id);
+ goto discard_pack;
+ }
+ }
+
+ close(pack_data->pack_fd);
+ idx_name = keep_pack(create_index());
+
+ /* Register the packfile with core git's machinery. */
+ new_p = add_packed_git(idx_name, strlen(idx_name), 1);
+ if (!new_p)
+ die("core git rejected index %s", idx_name);
+ all_packs[pack_id] = new_p;
+ install_packed_git(the_repository, new_p);
+ free(idx_name);
+
+ /* Print the boundary */
+ if (pack_edges) {
+ fprintf(pack_edges, "%s:", new_p->pack_name);
+ for (i = 0; i < branch_table_sz; i++) {
+ for (b = branch_table[i]; b; b = b->table_next_branch) {
+ if (b->pack_id == pack_id)
+ fprintf(pack_edges, " %s",
+ oid_to_hex(&b->oid));
+ }
+ }
+ for (t = first_tag; t; t = t->next_tag) {
+ if (t->pack_id == pack_id)
+ fprintf(pack_edges, " %s",
+ oid_to_hex(&t->oid));
+ }
+ fputc('\n', pack_edges);
+ fflush(pack_edges);
+ }
+
+ pack_id++;
+ }
+ else {
+discard_pack:
+ close(pack_data->pack_fd);
+ unlink_or_warn(pack_data->pack_name);
+ }
+ FREE_AND_NULL(pack_data);
+ running = 0;
+
+ /* We can't carry a delta across packfiles. */
+ strbuf_release(&last_blob.data);
+ last_blob.offset = 0;
+ last_blob.depth = 0;
+}
+
+static void cycle_packfile(void)
+{
+ end_packfile();
+ start_packfile();
+}
+
+static int store_object(
+ enum object_type type,
+ struct strbuf *dat,
+ struct last_object *last,
+ struct object_id *oidout,
+ uintmax_t mark)
+{
+ void *out, *delta;
+ struct object_entry *e;
+ unsigned char hdr[96];
+ struct object_id oid;
+ unsigned long hdrlen, deltalen;
+ git_hash_ctx c;
+ git_zstream s;
+
+ hdrlen = xsnprintf((char *)hdr, sizeof(hdr), "%s %lu",
+ type_name(type), (unsigned long)dat->len) + 1;
+ the_hash_algo->init_fn(&c);
+ the_hash_algo->update_fn(&c, hdr, hdrlen);
+ the_hash_algo->update_fn(&c, dat->buf, dat->len);
+ the_hash_algo->final_fn(oid.hash, &c);
+ if (oidout)
+ oidcpy(oidout, &oid);
+
+ e = insert_object(&oid);
+ if (mark)
+ insert_mark(marks, mark, e);
+ if (e->idx.offset) {
+ duplicate_count_by_type[type]++;
+ return 1;
+ } else if (find_sha1_pack(oid.hash,
+ get_all_packs(the_repository))) {
+ e->type = type;
+ e->pack_id = MAX_PACK_ID;
+ e->idx.offset = 1; /* just not zero! */
+ duplicate_count_by_type[type]++;
+ return 1;
+ }
+
+ if (last && last->data.len && last->data.buf && last->depth < max_depth
+ && dat->len > the_hash_algo->rawsz) {
+
+ delta_count_attempts_by_type[type]++;
+ delta = diff_delta(last->data.buf, last->data.len,
+ dat->buf, dat->len,
+ &deltalen, dat->len - the_hash_algo->rawsz);
+ } else
+ delta = NULL;
+
+ git_deflate_init(&s, pack_compression_level);
+ if (delta) {
+ s.next_in = delta;
+ s.avail_in = deltalen;
+ } else {
+ s.next_in = (void *)dat->buf;
+ s.avail_in = dat->len;
+ }
+ s.avail_out = git_deflate_bound(&s, s.avail_in);
+ s.next_out = out = xmalloc(s.avail_out);
+ while (git_deflate(&s, Z_FINISH) == Z_OK)
+ ; /* nothing */
+ git_deflate_end(&s);
+
+ /* Determine if we should auto-checkpoint. */
+ if ((max_packsize
+ && (pack_size + PACK_SIZE_THRESHOLD + s.total_out) > max_packsize)
+ || (pack_size + PACK_SIZE_THRESHOLD + s.total_out) < pack_size) {
+
+ /* This new object needs to *not* have the current pack_id. */
+ e->pack_id = pack_id + 1;
+ cycle_packfile();
+
+ /* We cannot carry a delta into the new pack. */
+ if (delta) {
+ FREE_AND_NULL(delta);
+
+ git_deflate_init(&s, pack_compression_level);
+ s.next_in = (void *)dat->buf;
+ s.avail_in = dat->len;
+ s.avail_out = git_deflate_bound(&s, s.avail_in);
+ s.next_out = out = xrealloc(out, s.avail_out);
+ while (git_deflate(&s, Z_FINISH) == Z_OK)
+ ; /* nothing */
+ git_deflate_end(&s);
+ }
+ }
+
+ e->type = type;
+ e->pack_id = pack_id;
+ e->idx.offset = pack_size;
+ object_count++;
+ object_count_by_type[type]++;
+
+ crc32_begin(pack_file);
+
+ if (delta) {
+ off_t ofs = e->idx.offset - last->offset;
+ unsigned pos = sizeof(hdr) - 1;
+
+ delta_count_by_type[type]++;
+ e->depth = last->depth + 1;
+
+ hdrlen = encode_in_pack_object_header(hdr, sizeof(hdr),
+ OBJ_OFS_DELTA, deltalen);
+ hashwrite(pack_file, hdr, hdrlen);
+ pack_size += hdrlen;
+
+ hdr[pos] = ofs & 127;
+ while (ofs >>= 7)
+ hdr[--pos] = 128 | (--ofs & 127);
+ hashwrite(pack_file, hdr + pos, sizeof(hdr) - pos);
+ pack_size += sizeof(hdr) - pos;
+ } else {
+ e->depth = 0;
+ hdrlen = encode_in_pack_object_header(hdr, sizeof(hdr),
+ type, dat->len);
+ hashwrite(pack_file, hdr, hdrlen);
+ pack_size += hdrlen;
+ }
+
+ hashwrite(pack_file, out, s.total_out);
+ pack_size += s.total_out;
+
+ e->idx.crc32 = crc32_end(pack_file);
+
+ free(out);
+ free(delta);
+ if (last) {
+ if (last->no_swap) {
+ last->data = *dat;
+ } else {
+ strbuf_swap(&last->data, dat);
+ }
+ last->offset = e->idx.offset;
+ last->depth = e->depth;
+ }
+ return 0;
+}
+
+static void truncate_pack(struct hashfile_checkpoint *checkpoint)
+{
+ if (hashfile_truncate(pack_file, checkpoint))
+ die_errno("cannot truncate pack to skip duplicate");
+ pack_size = checkpoint->offset;
+}
+
+static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark)
+{
+ size_t in_sz = 64 * 1024, out_sz = 64 * 1024;
+ unsigned char *in_buf = xmalloc(in_sz);
+ unsigned char *out_buf = xmalloc(out_sz);
+ struct object_entry *e;
+ struct object_id oid;
+ unsigned long hdrlen;
+ off_t offset;
+ git_hash_ctx c;
+ git_zstream s;
+ struct hashfile_checkpoint checkpoint;
+ int status = Z_OK;
+
+ /* Determine if we should auto-checkpoint. */
+ if ((max_packsize
+ && (pack_size + PACK_SIZE_THRESHOLD + len) > max_packsize)
+ || (pack_size + PACK_SIZE_THRESHOLD + len) < pack_size)
+ cycle_packfile();
+
+ hashfile_checkpoint(pack_file, &checkpoint);
+ offset = checkpoint.offset;
+
+ hdrlen = xsnprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1;
+
+ the_hash_algo->init_fn(&c);
+ the_hash_algo->update_fn(&c, out_buf, hdrlen);
+
+ crc32_begin(pack_file);
+
+ git_deflate_init(&s, pack_compression_level);
+
+ hdrlen = encode_in_pack_object_header(out_buf, out_sz, OBJ_BLOB, len);
+
+ s.next_out = out_buf + hdrlen;
+ s.avail_out = out_sz - hdrlen;
+
+ while (status != Z_STREAM_END) {
+ if (0 < len && !s.avail_in) {
+ size_t cnt = in_sz < len ? in_sz : (size_t)len;
+ size_t n = fread(in_buf, 1, cnt, stdin);
+ if (!n && feof(stdin))
+ die("EOF in data (%" PRIuMAX " bytes remaining)", len);
+
+ the_hash_algo->update_fn(&c, in_buf, n);
+ s.next_in = in_buf;
+ s.avail_in = n;
+ len -= n;
+ }
+
+ status = git_deflate(&s, len ? 0 : Z_FINISH);
+
+ if (!s.avail_out || status == Z_STREAM_END) {
+ size_t n = s.next_out - out_buf;
+ hashwrite(pack_file, out_buf, n);
+ pack_size += n;
+ s.next_out = out_buf;
+ s.avail_out = out_sz;
+ }
+
+ switch (status) {
+ case Z_OK:
+ case Z_BUF_ERROR:
+ case Z_STREAM_END:
+ continue;
+ default:
+ die("unexpected deflate failure: %d", status);
+ }
+ }
+ git_deflate_end(&s);
+ the_hash_algo->final_fn(oid.hash, &c);
+
+ if (oidout)
+ oidcpy(oidout, &oid);
+
+ e = insert_object(&oid);
+
+ if (mark)
+ insert_mark(marks, mark, e);
+
+ if (e->idx.offset) {
+ duplicate_count_by_type[OBJ_BLOB]++;
+ truncate_pack(&checkpoint);
+
+ } else if (find_sha1_pack(oid.hash,
+ get_all_packs(the_repository))) {
+ e->type = OBJ_BLOB;
+ e->pack_id = MAX_PACK_ID;
+ e->idx.offset = 1; /* just not zero! */
+ duplicate_count_by_type[OBJ_BLOB]++;
+ truncate_pack(&checkpoint);
+
+ } else {
+ e->depth = 0;
+ e->type = OBJ_BLOB;
+ e->pack_id = pack_id;
+ e->idx.offset = offset;
+ e->idx.crc32 = crc32_end(pack_file);
+ object_count++;
+ object_count_by_type[OBJ_BLOB]++;
+ }
+
+ free(in_buf);
+ free(out_buf);
+}
+
+/* All calls must be guarded by find_object() or find_mark() to
+ * ensure the 'struct object_entry' passed was written by this
+ * process instance. We unpack the entry by the offset, avoiding
+ * the need for the corresponding .idx file. This unpac