diff options
author | Junio C Hamano <gitster@pobox.com> | 2015-05-11 14:23:44 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2015-05-11 14:23:44 -0700 |
commit | eb10a850985fff2eb9a6810bc8fa35129f2b3b71 (patch) | |
tree | 2a5cddfd0c0ba5b9a99287149c304a750db3ee27 /builtin | |
parent | Merge branch 'jk/still-interesting' (diff) | |
parent | index-pack: kill union delta_base to save memory (diff) | |
download | tgif-eb10a850985fff2eb9a6810bc8fa35129f2b3b71.tar.xz |
Merge branch 'nd/slim-index-pack-memory-usage'
Memory usage of "git index-pack" has been trimmed by tens of
per-cent.
* nd/slim-index-pack-memory-usage:
index-pack: kill union delta_base to save memory
index-pack: reduce object_entry size to save memory
Diffstat (limited to 'builtin')
-rw-r--r-- | builtin/index-pack.c | 290 |
1 files changed, 179 insertions, 111 deletions
diff --git a/builtin/index-pack.c b/builtin/index-pack.c index cf654df09b..7ea2020d82 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -18,16 +18,14 @@ static const char index_pack_usage[] = struct object_entry { struct pack_idx_entry idx; unsigned long size; - unsigned int hdr_size; - enum object_type type; - enum object_type real_type; - unsigned delta_depth; - int base_object_no; + unsigned char hdr_size; + signed char type; + signed char real_type; }; -union delta_base { - unsigned char sha1[20]; - off_t offset; +struct object_stat { + unsigned delta_depth; + int base_object_no; }; struct base_data { @@ -49,25 +47,28 @@ struct thread_local { int pack_fd; }; -/* - * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want - * to memcmp() only the first 20 bytes. - */ -#define UNION_BASE_SZ 20 - #define FLAG_LINK (1u<<20) #define FLAG_CHECKED (1u<<21) -struct delta_entry { - union delta_base base; +struct ofs_delta_entry { + off_t offset; + int obj_no; +}; + +struct ref_delta_entry { + unsigned char sha1[20]; int obj_no; }; static struct object_entry *objects; -static struct delta_entry *deltas; +static struct object_stat *obj_stat; +static struct ofs_delta_entry *ofs_deltas; +static struct ref_delta_entry *ref_deltas; static struct thread_local nothread_data; static int nr_objects; -static int nr_deltas; +static int nr_ofs_deltas; +static int nr_ref_deltas; +static int ref_deltas_alloc; static int nr_resolved_deltas; static int nr_threads; @@ -476,7 +477,8 @@ static void *unpack_entry_data(unsigned long offset, unsigned long size, } static void *unpack_raw_entry(struct object_entry *obj, - union delta_base *delta_base, + off_t *ofs_offset, + unsigned char *ref_sha1, unsigned char *sha1) { unsigned char *p; @@ -505,11 +507,10 @@ static void *unpack_raw_entry(struct object_entry *obj, switch (obj->type) { case OBJ_REF_DELTA: - hashcpy(delta_base->sha1, fill(20)); + hashcpy(ref_sha1, fill(20)); use(20); break; case OBJ_OFS_DELTA: - memset(delta_base, 0, sizeof(*delta_base)); p = fill(1); c = *p; use(1); @@ -523,8 +524,8 @@ static void *unpack_raw_entry(struct object_entry *obj, use(1); base_offset = (base_offset << 7) + (c & 127); } - delta_base->offset = obj->idx.offset - base_offset; - if (delta_base->offset <= 0 || delta_base->offset >= obj->idx.offset) + *ofs_offset = obj->idx.offset - base_offset; + if (*ofs_offset <= 0 || *ofs_offset >= obj->idx.offset) bad_object(obj->idx.offset, _("delta base offset is out of bound")); break; case OBJ_COMMIT: @@ -608,55 +609,108 @@ static void *get_data_from_pack(struct object_entry *obj) return unpack_data(obj, NULL, NULL); } -static int compare_delta_bases(const union delta_base *base1, - const union delta_base *base2, - enum object_type type1, - enum object_type type2) +static int compare_ofs_delta_bases(off_t offset1, off_t offset2, + enum object_type type1, + enum object_type type2) { int cmp = type1 - type2; if (cmp) return cmp; - return memcmp(base1, base2, UNION_BASE_SZ); + return offset1 - offset2; } -static int find_delta(const union delta_base *base, enum object_type type) +static int find_ofs_delta(const off_t offset, enum object_type type) { - int first = 0, last = nr_deltas; - - while (first < last) { - int next = (first + last) / 2; - struct delta_entry *delta = &deltas[next]; - int cmp; - - cmp = compare_delta_bases(base, &delta->base, - type, objects[delta->obj_no].type); - if (!cmp) - return next; - if (cmp < 0) { - last = next; - continue; - } - first = next+1; - } - return -first-1; + int first = 0, last = nr_ofs_deltas; + + while (first < last) { + int next = (first + last) / 2; + struct ofs_delta_entry *delta = &ofs_deltas[next]; + int cmp; + + cmp = compare_ofs_delta_bases(offset, delta->offset, + type, objects[delta->obj_no].type); + if (!cmp) + return next; + if (cmp < 0) { + last = next; + continue; + } + first = next+1; + } + return -first-1; } -static void find_delta_children(const union delta_base *base, - int *first_index, int *last_index, - enum object_type type) +static void find_ofs_delta_children(off_t offset, + int *first_index, int *last_index, + enum object_type type) { - int first = find_delta(base, type); + int first = find_ofs_delta(offset, type); int last = first; - int end = nr_deltas - 1; + int end = nr_ofs_deltas - 1; if (first < 0) { *first_index = 0; *last_index = -1; return; } - while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ)) + while (first > 0 && ofs_deltas[first - 1].offset == offset) --first; - while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ)) + while (last < end && ofs_deltas[last + 1].offset == offset) + ++last; + *first_index = first; + *last_index = last; +} + +static int compare_ref_delta_bases(const unsigned char *sha1, + const unsigned char *sha2, + enum object_type type1, + enum object_type type2) +{ + int cmp = type1 - type2; + if (cmp) + return cmp; + return hashcmp(sha1, sha2); +} + +static int find_ref_delta(const unsigned char *sha1, enum object_type type) +{ + int first = 0, last = nr_ref_deltas; + + while (first < last) { + int next = (first + last) / 2; + struct ref_delta_entry *delta = &ref_deltas[next]; + int cmp; + + cmp = compare_ref_delta_bases(sha1, delta->sha1, + type, objects[delta->obj_no].type); + if (!cmp) + return next; + if (cmp < 0) { + last = next; + continue; + } + first = next+1; + } + return -first-1; +} + +static void find_ref_delta_children(const unsigned char *sha1, + int *first_index, int *last_index, + enum object_type type) +{ + int first = find_ref_delta(sha1, type); + int last = first; + int end = nr_ref_deltas - 1; + + if (first < 0) { + *first_index = 0; + *last_index = -1; + return; + } + while (first > 0 && !hashcmp(ref_deltas[first - 1].sha1, sha1)) + --first; + while (last < end && !hashcmp(ref_deltas[last + 1].sha1, sha1)) ++last; *first_index = first; *last_index = last; @@ -873,13 +927,15 @@ static void resolve_delta(struct object_entry *delta_obj, void *base_data, *delta_data; if (show_stat) { - delta_obj->delta_depth = base->obj->delta_depth + 1; + int i = delta_obj - objects; + int j = base->obj - objects; + obj_stat[i].delta_depth = obj_stat[j].delta_depth + 1; deepest_delta_lock(); - if (deepest_delta < delta_obj->delta_depth) - deepest_delta = delta_obj->delta_depth; + if (deepest_delta < obj_stat[i].delta_depth) + deepest_delta = obj_stat[i].delta_depth; deepest_delta_unlock(); + obj_stat[i].base_object_no = j; } - delta_obj->base_object_no = base->obj - objects; delta_data = get_data_from_pack(delta_obj); base_data = get_base_data(base); result->obj = delta_obj; @@ -902,7 +958,7 @@ static void resolve_delta(struct object_entry *delta_obj, * "want"; if so, swap in "set" and return true. Otherwise, leave it untouched * and return false. */ -static int compare_and_swap_type(enum object_type *type, +static int compare_and_swap_type(signed char *type, enum object_type want, enum object_type set) { @@ -921,16 +977,13 @@ static struct base_data *find_unresolved_deltas_1(struct base_data *base, struct base_data *prev_base) { if (base->ref_last == -1 && base->ofs_last == -1) { - union delta_base base_spec; + find_ref_delta_children(base->obj->idx.sha1, + &base->ref_first, &base->ref_last, + OBJ_REF_DELTA); - hashcpy(base_spec.sha1, base->obj->idx.sha1); - find_delta_children(&base_spec, - &base->ref_first, &base->ref_last, OBJ_REF_DELTA); - - memset(&base_spec, 0, sizeof(base_spec)); - base_spec.offset = base->obj->idx.offset; - find_delta_children(&base_spec, - &base->ofs_first, &base->ofs_last, OBJ_OFS_DELTA); + find_ofs_delta_children(base->obj->idx.offset, + &base->ofs_first, &base->ofs_last, + OBJ_OFS_DELTA); if (base->ref_last == -1 && base->ofs_last == -1) { free(base->data); @@ -941,7 +994,7 @@ static struct base_data *find_unresolved_deltas_1(struct base_data *base, } if (base->ref_first <= base->ref_last) { - struct object_entry *child = objects + deltas[base->ref_first].obj_no; + struct object_entry *child = objects + ref_deltas[base->ref_first].obj_no; struct base_data *result = alloc_base_data(); if (!compare_and_swap_type(&child->real_type, OBJ_REF_DELTA, @@ -957,7 +1010,7 @@ static struct base_data *find_unresolved_deltas_1(struct base_data *base, } if (base->ofs_first <= base->ofs_last) { - struct object_entry *child = objects + deltas[base->ofs_first].obj_no; + struct object_entry *child = objects + ofs_deltas[base->ofs_first].obj_no; struct base_data *result = alloc_base_data(); assert(child->real_type == OBJ_OFS_DELTA); @@ -993,15 +1046,20 @@ static void find_unresolved_deltas(struct base_data *base) } } -static int compare_delta_entry(const void *a, const void *b) +static int compare_ofs_delta_entry(const void *a, const void *b) { - const struct delta_entry *delta_a = a; - const struct delta_entry *delta_b = b; + const struct ofs_delta_entry *delta_a = a; + const struct ofs_delta_entry *delta_b = b; - /* group by type (ref vs ofs) and then by value (sha-1 or offset) */ - return compare_delta_bases(&delta_a->base, &delta_b->base, - objects[delta_a->obj_no].type, - objects[delta_b->obj_no].type); + return delta_a->offset - delta_b->offset; +} + +static int compare_ref_delta_entry(const void *a, const void *b) +{ + const struct ref_delta_entry *delta_a = a; + const struct ref_delta_entry *delta_b = b; + + return hashcmp(delta_a->sha1, delta_b->sha1); } static void resolve_base(struct object_entry *obj) @@ -1047,7 +1105,8 @@ static void *threaded_second_pass(void *data) static void parse_pack_objects(unsigned char *sha1) { int i, nr_delays = 0; - struct delta_entry *delta = deltas; + struct ofs_delta_entry *ofs_delta = ofs_deltas; + unsigned char ref_delta_sha1[20]; struct stat st; if (verbose) @@ -1056,12 +1115,18 @@ static void parse_pack_objects(unsigned char *sha1) nr_objects); for (i = 0; i < nr_objects; i++) { struct object_entry *obj = &objects[i]; - void *data = unpack_raw_entry(obj, &delta->base, obj->idx.sha1); + void *data = unpack_raw_entry(obj, &ofs_delta->offset, + ref_delta_sha1, obj->idx.sha1); obj->real_type = obj->type; - if (is_delta_type(obj->type)) { - nr_deltas++; - delta->obj_no = i; - delta++; + if (obj->type == OBJ_OFS_DELTA) { + nr_ofs_deltas++; + ofs_delta->obj_no = i; + ofs_delta++; + } else if (obj->type == OBJ_REF_DELTA) { + ALLOC_GROW(ref_deltas, nr_ref_deltas + 1, ref_deltas_alloc); + hashcpy(ref_deltas[nr_ref_deltas].sha1, ref_delta_sha1); + ref_deltas[nr_ref_deltas].obj_no = i; + nr_ref_deltas++; } else if (!data) { /* large blobs, check later */ obj->real_type = OBJ_BAD; @@ -1112,15 +1177,18 @@ static void resolve_deltas(void) { int i; - if (!nr_deltas) + if (!nr_ofs_deltas && !nr_ref_deltas) return; /* Sort deltas by base SHA1/offset for fast searching */ - qsort(deltas, nr_deltas, sizeof(struct delta_entry), - compare_delta_entry); + qsort(ofs_deltas, nr_ofs_deltas, sizeof(struct ofs_delta_entry), + compare_ofs_delta_entry); + qsort(ref_deltas, nr_ref_deltas, sizeof(struct ref_delta_entry), + compare_ref_delta_entry); if (verbose) - progress = start_progress(_("Resolving deltas"), nr_deltas); + progress = start_progress(_("Resolving deltas"), + nr_ref_deltas + nr_ofs_deltas); #ifndef NO_PTHREADS nr_dispatched = 0; @@ -1158,7 +1226,7 @@ static void resolve_deltas(void) static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved); static void conclude_pack(int fix_thin_pack, const char *curr_pack, unsigned char *pack_sha1) { - if (nr_deltas == nr_resolved_deltas) { + if (nr_ref_deltas + nr_ofs_deltas == nr_resolved_deltas) { stop_progress(&progress); /* Flush remaining pack final 20-byte SHA1. */ flush(); @@ -1169,7 +1237,7 @@ static void conclude_pack(int fix_thin_pack, const char *curr_pack, unsigned cha struct sha1file *f; unsigned char read_sha1[20], tail_sha1[20]; struct strbuf msg = STRBUF_INIT; - int nr_unresolved = nr_deltas - nr_resolved_deltas; + int nr_unresolved = nr_ofs_deltas + nr_ref_deltas - nr_resolved_deltas; int nr_objects_initial = nr_objects; if (nr_unresolved <= 0) die(_("confusion beyond insanity")); @@ -1191,11 +1259,11 @@ static void conclude_pack(int fix_thin_pack, const char *curr_pack, unsigned cha die(_("Unexpected tail checksum for %s " "(disk corruption?)"), curr_pack); } - if (nr_deltas != nr_resolved_deltas) + if (nr_ofs_deltas + nr_ref_deltas != nr_resolved_deltas) die(Q_("pack has %d unresolved delta", "pack has %d unresolved deltas", - nr_deltas - nr_resolved_deltas), - nr_deltas - nr_resolved_deltas); + nr_ofs_deltas + nr_ref_deltas - nr_resolved_deltas), + nr_ofs_deltas + nr_ref_deltas - nr_resolved_deltas); } static int write_compressed(struct sha1file *f, void *in, unsigned int size) @@ -1254,14 +1322,14 @@ static struct object_entry *append_obj_to_pack(struct sha1file *f, static int delta_pos_compare(const void *_a, const void *_b) { - struct delta_entry *a = *(struct delta_entry **)_a; - struct delta_entry *b = *(struct delta_entry **)_b; + struct ref_delta_entry *a = *(struct ref_delta_entry **)_a; + struct ref_delta_entry *b = *(struct ref_delta_entry **)_b; return a->obj_no - b->obj_no; } static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved) { - struct delta_entry **sorted_by_pos; + struct ref_delta_entry **sorted_by_pos; int i, n = 0; /* @@ -1275,28 +1343,25 @@ static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved) * resolving deltas in the same order as their position in the pack. */ sorted_by_pos = xmalloc(nr_unresolved * sizeof(*sorted_by_pos)); - for (i = 0; i < nr_deltas; i++) { - if (objects[deltas[i].obj_no].real_type != OBJ_REF_DELTA) - continue; - sorted_by_pos[n++] = &deltas[i]; - } + for (i = 0; i < nr_ref_deltas; i++) + sorted_by_pos[n++] = &ref_deltas[i]; qsort(sorted_by_pos, n, sizeof(*sorted_by_pos), delta_pos_compare); for (i = 0; i < n; i++) { - struct delta_entry *d = sorted_by_pos[i]; + struct ref_delta_entry *d = sorted_by_pos[i]; enum object_type type; struct base_data *base_obj = alloc_base_data(); if (objects[d->obj_no].real_type != OBJ_REF_DELTA) continue; - base_obj->data = read_sha1_file(d->base.sha1, &type, &base_obj->size); + base_obj->data = read_sha1_file(d->sha1, &type, &base_obj->size); if (!base_obj->data) continue; - if (check_sha1_signature(d->base.sha1, base_obj->data, + if (check_sha1_signature(d->sha1, base_obj->data, base_obj->size, typename(type))) - die(_("local object %s is corrupt"), sha1_to_hex(d->base.sha1)); - base_obj->obj = append_obj_to_pack(f, d->base.sha1, + die(_("local object %s is corrupt"), sha1_to_hex(d->sha1)); + base_obj->obj = append_obj_to_pack(f, d->sha1, base_obj->data, base_obj->size, type); find_unresolved_deltas(base_obj); display_progress(progress, nr_resolved_deltas); @@ -1488,7 +1553,7 @@ static void read_idx_option(struct pack_idx_option *opts, const char *pack_name) static void show_pack_info(int stat_only) { - int i, baseobjects = nr_objects - nr_deltas; + int i, baseobjects = nr_objects - nr_ref_deltas - nr_ofs_deltas; unsigned long *chain_histogram = NULL; if (deepest_delta) @@ -1498,7 +1563,7 @@ static void show_pack_info(int stat_only) struct object_entry *obj = &objects[i]; if (is_delta_type(obj->type)) - chain_histogram[obj->delta_depth - 1]++; + chain_histogram[obj_stat[i].delta_depth - 1]++; if (stat_only) continue; printf("%s %-6s %lu %lu %"PRIuMAX, @@ -1507,8 +1572,8 @@ static void show_pack_info(int stat_only) (unsigned long)(obj[1].idx.offset - obj->idx.offset), (uintmax_t)obj->idx.offset); if (is_delta_type(obj->type)) { - struct object_entry *bobj = &objects[obj->base_object_no]; - printf(" %u %s", obj->delta_depth, sha1_to_hex(bobj->idx.sha1)); + struct object_entry *bobj = &objects[obj_stat[i].base_object_no]; + printf(" %u %s", obj_stat[i].delta_depth, sha1_to_hex(bobj->idx.sha1)); } putchar('\n'); } @@ -1671,11 +1736,14 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) curr_pack = open_pack_file(pack_name); parse_pack_header(); objects = xcalloc(nr_objects + 1, sizeof(struct object_entry)); - deltas = xcalloc(nr_objects, sizeof(struct delta_entry)); + if (show_stat) + obj_stat = xcalloc(nr_objects + 1, sizeof(struct object_stat)); + ofs_deltas = xcalloc(nr_objects, sizeof(struct ofs_delta_entry)); parse_pack_objects(pack_sha1); resolve_deltas(); conclude_pack(fix_thin_pack, curr_pack, pack_sha1); - free(deltas); + free(ofs_deltas); + free(ref_deltas); if (strict) foreign_nr = check_objects(); |