From 7218a215efc7ae46f7ca8d82442f354e7ac06262 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 2 Feb 2011 10:06:51 -0800 Subject: index-pack: group the delta-base array entries also by type Entries in the delta_base array are only grouped by the bytepattern in the delta_base union, some of which have 20-byte object name of the base object (i.e. base for REF_DELTA objects), while others have sizeof(off_t) bytes followed by enough NULs to fill 20-byte. The loops to iterate through a range inside this array still needs to inspect the type of the delta, and skip over false hits. Group the entries also by type to eliminate the potential of false hits. Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 61 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 21 deletions(-) (limited to 'builtin') diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 8dc5c0b541..1b5d83afef 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -391,7 +391,18 @@ static void *get_data_from_pack(struct object_entry *obj) return data; } -static int find_delta(const union delta_base *base) +static int compare_delta_bases(const union delta_base *base1, + const union delta_base *base2, + enum object_type type1, + enum object_type type2) +{ + int cmp = type1 - type2; + if (cmp) + return cmp; + return memcmp(base1, base2, UNION_BASE_SZ); +} + +static int find_delta(const union delta_base *base, enum object_type type) { int first = 0, last = nr_deltas; @@ -400,7 +411,8 @@ static int find_delta(const union delta_base *base) struct delta_entry *delta = &deltas[next]; int cmp; - cmp = memcmp(base, &delta->base, UNION_BASE_SZ); + cmp = compare_delta_bases(base, &delta->base, + type, objects[delta->obj_no].type); if (!cmp) return next; if (cmp < 0) { @@ -413,9 +425,10 @@ static int find_delta(const union delta_base *base) } static void find_delta_children(const union delta_base *base, - int *first_index, int *last_index) + int *first_index, int *last_index, + enum object_type type) { - int first = find_delta(base); + int first = find_delta(base, type); int last = first; int end = nr_deltas - 1; @@ -543,11 +556,13 @@ static void find_unresolved_deltas(struct base_data *base, union delta_base base_spec; hashcpy(base_spec.sha1, base->obj->idx.sha1); - find_delta_children(&base_spec, &ref_first, &ref_last); + find_delta_children(&base_spec, + &ref_first, &ref_last, OBJ_REF_DELTA); memset(&base_spec, 0, sizeof(base_spec)); base_spec.offset = base->obj->idx.offset; - find_delta_children(&base_spec, &ofs_first, &ofs_last); + find_delta_children(&base_spec, + &ofs_first, &ofs_last, OBJ_OFS_DELTA); } if (ref_last == -1 && ofs_last == -1) { @@ -559,24 +574,24 @@ static void find_unresolved_deltas(struct base_data *base, for (i = ref_first; i <= ref_last; i++) { struct object_entry *child = objects + deltas[i].obj_no; - if (child->real_type == OBJ_REF_DELTA) { - struct base_data result; - resolve_delta(child, base, &result); - if (i == ref_last && ofs_last == -1) - free_base_data(base); - find_unresolved_deltas(&result, base); - } + struct base_data result; + + assert(child->real_type == OBJ_REF_DELTA); + resolve_delta(child, base, &result); + if (i == ref_last && ofs_last == -1) + free_base_data(base); + find_unresolved_deltas(&result, base); } for (i = ofs_first; i <= ofs_last; i++) { struct object_entry *child = objects + deltas[i].obj_no; - if (child->real_type == OBJ_OFS_DELTA) { - struct base_data result; - resolve_delta(child, base, &result); - if (i == ofs_last) - free_base_data(base); - find_unresolved_deltas(&result, base); - } + struct base_data result; + + assert(child->real_type == OBJ_OFS_DELTA); + resolve_delta(child, base, &result); + if (i == ofs_last) + free_base_data(base); + find_unresolved_deltas(&result, base); } unlink_base_data(base); @@ -586,7 +601,11 @@ static int compare_delta_entry(const void *a, const void *b) { const struct delta_entry *delta_a = a; const struct delta_entry *delta_b = b; - return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ); + + /* group by type (ref vs ofs) and then by value (sha-1 or offset) */ + return compare_delta_bases(&delta_a->base, &delta_b->base, + objects[delta_a->obj_no].type, + objects[delta_b->obj_no].type); } /* Parse all objects and return the pack content SHA1 hash */ -- cgit v1.2.3 From ebcfb3791a53e0455bf8361046e3310993697a8e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 25 Feb 2011 15:43:25 -0800 Subject: write_idx_file: introduce a struct to hold idx customization options Remove two globals, pack_idx_default version and pack_idx_off32_limit, and place them in a pack_idx_option structure. Allow callers to pass it to write_idx_file() as a parameter. Adjust all callers to the API change. Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 23 +++++++++++++---------- builtin/pack-objects.c | 20 +++++++++++--------- 2 files changed, 24 insertions(+), 19 deletions(-) (limited to 'builtin') diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 1b5d83afef..4df681885e 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -880,11 +880,12 @@ static void final(const char *final_pack_name, const char *curr_pack_name, static int git_index_pack_config(const char *k, const char *v, void *cb) { + struct pack_idx_option *opts = cb; + if (!strcmp(k, "pack.indexversion")) { - pack_idx_default_version = git_config_int(k, v); - if (pack_idx_default_version > 2) - die("bad pack.indexversion=%"PRIu32, - pack_idx_default_version); + opts->version = git_config_int(k, v); + if (opts->version > 2) + die("bad pack.indexversion=%"PRIu32, opts->version); return 0; } return git_default_config(k, v, cb); @@ -898,6 +899,7 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) const char *keep_name = NULL, *keep_msg = NULL; char *index_name_buf = NULL, *keep_name_buf = NULL; struct pack_idx_entry **idx_objects; + struct pack_idx_option opts; unsigned char pack_sha1[20]; if (argc == 2 && !strcmp(argv[1], "-h")) @@ -905,7 +907,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) read_replace_refs = 0; - git_config(git_index_pack_config, NULL); + reset_pack_idx_option(&opts); + git_config(git_index_pack_config, &opts); if (prefix && chdir(prefix)) die("Cannot come back to cwd"); @@ -944,12 +947,12 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) index_name = argv[++i]; } else if (!prefixcmp(arg, "--index-version=")) { char *c; - pack_idx_default_version = strtoul(arg + 16, &c, 10); - if (pack_idx_default_version > 2) + opts.version = strtoul(arg + 16, &c, 10); + if (opts.version > 2) die("bad %s", arg); if (*c == ',') - pack_idx_off32_limit = strtoul(c+1, &c, 0); - if (*c || pack_idx_off32_limit & 0x80000000) + opts.off32_limit = strtoul(c+1, &c, 0); + if (*c || opts.off32_limit & 0x80000000) die("bad %s", arg); } else usage(index_pack_usage); @@ -1032,7 +1035,7 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) idx_objects = xmalloc((nr_objects) * sizeof(struct pack_idx_entry *)); for (i = 0; i < nr_objects; i++) idx_objects[i] = &objects[i].idx; - curr_index = write_idx_file(index_name, idx_objects, nr_objects, pack_sha1); + curr_index = write_idx_file(index_name, idx_objects, nr_objects, &opts, pack_sha1); free(idx_objects); final(pack_name, curr_pack, diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b0503b202a..dc471b78c4 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -70,6 +70,7 @@ static int local; static int incremental; static int ignore_packed_keep; static int allow_ofs_delta; +static struct pack_idx_option pack_idx_opts; static const char *base_name; static int progress = 1; static int window = 10; @@ -493,8 +494,8 @@ static void write_pack_file(void) const char *idx_tmp_name; char tmpname[PATH_MAX]; - idx_tmp_name = write_idx_file(NULL, written_list, - nr_written, sha1); + idx_tmp_name = write_idx_file(NULL, written_list, nr_written, + &pack_idx_opts, sha1); snprintf(tmpname, sizeof(tmpname), "%s-%s.pack", base_name, sha1_to_hex(sha1)); @@ -1880,10 +1881,10 @@ static int git_pack_config(const char *k, const char *v, void *cb) return 0; } if (!strcmp(k, "pack.indexversion")) { - pack_idx_default_version = git_config_int(k, v); - if (pack_idx_default_version > 2) + pack_idx_opts.version = git_config_int(k, v); + if (pack_idx_opts.version > 2) die("bad pack.indexversion=%"PRIu32, - pack_idx_default_version); + pack_idx_opts.version); return 0; } if (!strcmp(k, "pack.packsizelimit")) { @@ -2130,6 +2131,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) rp_av[1] = "--objects"; /* --thin will make it --objects-edge */ rp_ac = 2; + reset_pack_idx_option(&pack_idx_opts); git_config(git_pack_config, NULL); if (!pack_compression_seen && core_compression_seen) pack_compression_level = core_compression_level; @@ -2274,12 +2276,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) } if (!prefixcmp(arg, "--index-version=")) { char *c; - pack_idx_default_version = strtoul(arg + 16, &c, 10); - if (pack_idx_default_version > 2) + pack_idx_opts.version = strtoul(arg + 16, &c, 10); + if (pack_idx_opts.version > 2) die("bad %s", arg); if (*c == ',') - pack_idx_off32_limit = strtoul(c+1, &c, 0); - if (*c || pack_idx_off32_limit & 0x80000000) + pack_idx_opts.off32_limit = strtoul(c+1, &c, 0); + if (*c || pack_idx_opts.off32_limit & 0x80000000) die("bad %s", arg); continue; } -- cgit v1.2.3 From e337a04de298f8c3e64ee1a187423203406b9bae Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 2 Feb 2011 17:29:01 -0800 Subject: index-pack: --verify Given an existing .pack file and the .idx file that describes it, this new mode of operation reads and re-index the packfile and makes sure the existing .idx file matches the result byte-for-byte. All the objects in the .pack file are validated during this operation as well. Unlike verify-pack, which visits each object described in the .idx file in the SHA-1 order, index-pack efficiently exploits the delta-chain to avoid rebuilding the objects that are used as the base of deltified objects over and over again while validating the objects, resulting in much quicker verification of the .pack file and its .idx file. This version however cannot verify a .pack/.idx pair with a handcrafted v2 index that uses 64-bit offset representation for offsets that would fit within 31-bit. You can create such an .idx file by giving a custom offset to --index-version option to the command. Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 46 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) (limited to 'builtin') diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 4df681885e..24a9a16220 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -11,7 +11,7 @@ #include "exec_cmd.h" static const char index_pack_usage[] = -"git index-pack [-v] [-o ] [ --keep | --keep= ] [--strict] ( | --stdin [--fix-thin] [])"; +"git index-pack [-v] [-o ] [--keep | --keep=] [--verify] [--strict] ( | --stdin [--fix-thin] [])"; struct object_entry { @@ -891,9 +891,32 @@ static int git_index_pack_config(const char *k, const char *v, void *cb) return git_default_config(k, v, cb); } +static void read_idx_option(struct pack_idx_option *opts, const char *pack_name) +{ + struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1); + + if (!p) + die("Cannot open existing pack file '%s'", pack_name); + if (open_pack_index(p)) + die("Cannot open existing pack idx file for '%s'", pack_name); + + /* Read the attributes from the existing idx file */ + opts->version = p->index_version; + + /* + * Get rid of the idx file as we do not need it anymore. + * NEEDSWORK: extract this bit from free_pack_by_name() in + * sha1_file.c, perhaps? It shouldn't matter very much as we + * know we haven't installed this pack (hence we never have + * read anything from it). + */ + close_pack_index(p); + free(p); +} + int cmd_index_pack(int argc, const char **argv, const char *prefix) { - int i, fix_thin_pack = 0; + int i, fix_thin_pack = 0, verify = 0; const char *curr_pack, *curr_index; const char *index_name = NULL, *pack_name = NULL; const char *keep_name = NULL, *keep_msg = NULL; @@ -922,6 +945,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) fix_thin_pack = 1; } else if (!strcmp(arg, "--strict")) { strict = 1; + } else if (!strcmp(arg, "--verify")) { + verify = 1; } else if (!strcmp(arg, "--keep")) { keep_msg = ""; } else if (!prefixcmp(arg, "--keep=")) { @@ -988,6 +1013,12 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) strcpy(keep_name_buf + len - 5, ".keep"); keep_name = keep_name_buf; } + if (verify) { + if (!index_name) + die("--verify with no packfile name given"); + read_idx_option(&opts, index_name); + opts.flags |= WRITE_IDX_VERIFY; + } curr_pack = open_pack_file(pack_name); parse_pack_header(); @@ -1038,10 +1069,13 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) curr_index = write_idx_file(index_name, idx_objects, nr_objects, &opts, pack_sha1); free(idx_objects); - final(pack_name, curr_pack, - index_name, curr_index, - keep_name, keep_msg, - pack_sha1); + if (!verify) + final(pack_name, curr_pack, + index_name, curr_index, + keep_name, keep_msg, + pack_sha1); + else + close(input_fd); free(objects); free(index_name_buf); free(keep_name_buf); -- cgit v1.2.3 From 3c9fc074c220d5d1d2173c84cc6ae57d750e2a2c Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 25 Feb 2011 16:55:26 -0800 Subject: index-pack --verify: read anomalous offsets from v2 idx file A pack v2 .idx file usually records offset using 64-bit representation only when the offset does not fit within 31-bit, but you can handcraft your .idx file to record smaller offset using 64-bit, storing all zero in the upper 4-byte. By inspecting the original idx file when running index-pack --verify, encode such low offsets that do not need to be in 64-bit but are encoded using 64-bit just like the original idx file so that we can still validate the pack/idx pair by comparing the idx file recomputed with the original. Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'builtin') diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 24a9a16220..513fbbc55f 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -891,6 +891,51 @@ static int git_index_pack_config(const char *k, const char *v, void *cb) return git_default_config(k, v, cb); } +static int cmp_uint32(const void *a_, const void *b_) +{ + uint32_t a = *((uint32_t *)a_); + uint32_t b = *((uint32_t *)b_); + + return (a < b) ? -1 : (a != b); +} + +static void read_v2_anomalous_offsets(struct packed_git *p, + struct pack_idx_option *opts) +{ + const uint32_t *idx1, *idx2; + uint32_t i; + + /* The address of the 4-byte offset table */ + idx1 = (((const uint32_t *)p->index_data) + + 2 /* 8-byte header */ + + 256 /* fan out */ + + 5 * p->num_objects /* 20-byte SHA-1 table */ + + p->num_objects /* CRC32 table */ + ); + + /* The address of the 8-byte offset table */ + idx2 = idx1 + p->num_objects; + + for (i = 0; i < p->num_objects; i++) { + uint32_t off = ntohl(idx1[i]); + if (!(off & 0x80000000)) + continue; + off = off & 0x7fffffff; + if (idx2[off * 2]) + continue; + /* + * The real offset is ntohl(idx2[off * 2]) in high 4 + * octets, and ntohl(idx2[off * 2 + 1]) in low 4 + * octets. But idx2[off * 2] is Zero!!! + */ + ALLOC_GROW(opts->anomaly, opts->anomaly_nr + 1, opts->anomaly_alloc); + opts->anomaly[opts->anomaly_nr++] = ntohl(idx2[off * 2 + 1]); + } + + if (1 < opts->anomaly_nr) + qsort(opts->anomaly, opts->anomaly_nr, sizeof(uint32_t), cmp_uint32); +} + static void read_idx_option(struct pack_idx_option *opts, const char *pack_name) { struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1); @@ -903,6 +948,9 @@ static void read_idx_option(struct pack_idx_option *opts, const char *pack_name) /* Read the attributes from the existing idx file */ opts->version = p->index_version; + if (opts->version == 2) + read_v2_anomalous_offsets(p, opts); + /* * Get rid of the idx file as we do not need it anymore. * NEEDSWORK: extract this bit from free_pack_by_name() in -- cgit v1.2.3 From 4f8ec74efa9fc69aa3b0bd52affe31ca09f2fdd3 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 3 Jun 2011 15:32:14 -0700 Subject: index-pack: a miniscule refactor Introduce a helper function that takes the type of an object and tell if it is a delta, as we seem to use this check in many places. Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'builtin') diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 513fbbc55f..0216af76af 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -498,12 +498,17 @@ static void sha1_object(const void *data, unsigned long size, } } +static int is_delta_type(enum object_type type) +{ + return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA); +} + static void *get_base_data(struct base_data *c) { if (!c->data) { struct object_entry *obj = c->obj; - if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) { + if (is_delta_type(obj->type)) { void *base = get_base_data(c->base); void *raw = get_data_from_pack(obj); c->data = patch_delta( @@ -629,7 +634,7 @@ static void parse_pack_objects(unsigned char *sha1) struct object_entry *obj = &objects[i]; void *data = unpack_raw_entry(obj, &delta->base); obj->real_type = obj->type; - if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) { + if (is_delta_type(obj->type)) { nr_deltas++; delta->obj_no = i; delta++; @@ -676,7 +681,7 @@ static void parse_pack_objects(unsigned char *sha1) struct object_entry *obj = &objects[i]; struct base_data base_obj; - if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) + if (is_delta_type(obj->type)) continue; base_obj.obj = obj; base_obj.data = NULL; -- cgit v1.2.3 From 38a4556198de17156f27f0463161a4d1b5e11555 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 3 Jun 2011 15:32:15 -0700 Subject: index-pack: start learning to emulate "verify-pack -v" The "index-pack" machinery already has almost enough knowledge to produce the same output as "verify-pack -v". Fill small gaps in its bookkeeping, and teach it to show what it knows. Add a few more command line options that do not have to be advertised to the end users. They will be used internally when verify-pack calls this. The eventual goal is to remove verify-pack implementation and redo it as a thin wrapper around the index-pack, so that we can remove the rather expensive packed_object_info_detail() API. This still does not do the delta-chain-depth histogram yet but that part is easy. Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) (limited to 'builtin') diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 0216af76af..aa3c9c6e11 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -20,6 +20,8 @@ struct object_entry unsigned int hdr_size; enum object_type type; enum object_type real_type; + unsigned delta_depth; + int base_object_no; }; union delta_base { @@ -535,6 +537,8 @@ static void resolve_delta(struct object_entry *delta_obj, void *base_data, *delta_data; delta_obj->real_type = base->obj->real_type; + delta_obj->delta_depth = base->obj->delta_depth + 1; + delta_obj->base_object_no = base->obj - objects; delta_data = get_data_from_pack(delta_obj); base_data = get_base_data(base); result->obj = delta_obj; @@ -967,9 +971,32 @@ static void read_idx_option(struct pack_idx_option *opts, const char *pack_name) free(p); } +static void show_pack_info(int stat_only) +{ + int i; + for (i = 0; i < nr_objects; i++) { + struct object_entry *obj = &objects[i]; + + /* NEEDSWORK: Compute data necessary for the "histogram" here */ + + if (stat_only) + continue; + printf("%s %-6s %lu %lu %"PRIuMAX, + sha1_to_hex(obj->idx.sha1), + typename(obj->real_type), obj->size, + (unsigned long)(obj[1].idx.offset - obj->idx.offset), + (uintmax_t)obj->idx.offset); + if (is_delta_type(obj->type)) { + struct object_entry *bobj = &objects[obj->base_object_no]; + printf(" %u %s", obj->delta_depth, sha1_to_hex(bobj->idx.sha1)); + } + putchar('\n'); + } +} + int cmd_index_pack(int argc, const char **argv, const char *prefix) { - int i, fix_thin_pack = 0, verify = 0; + int i, fix_thin_pack = 0, verify = 0, stat_only = 0, stat = 0; const char *curr_pack, *curr_index; const char *index_name = NULL, *pack_name = NULL; const char *keep_name = NULL, *keep_msg = NULL; @@ -1000,6 +1027,13 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) strict = 1; } else if (!strcmp(arg, "--verify")) { verify = 1; + } else if (!strcmp(arg, "--verify-stat")) { + verify = 1; + stat = 1; + } else if (!strcmp(arg, "--verify-stat-only")) { + verify = 1; + stat = 1; + stat_only = 1; } else if (!strcmp(arg, "--keep")) { keep_msg = ""; } else if (!prefixcmp(arg, "--keep=")) { @@ -1075,8 +1109,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) curr_pack = open_pack_file(pack_name); parse_pack_header(); - objects = xmalloc((nr_objects + 1) * sizeof(struct object_entry)); - deltas = xmalloc(nr_objects * sizeof(struct delta_entry)); + objects = xcalloc(nr_objects + 1, sizeof(struct object_entry)); + deltas = xcalloc(nr_objects, sizeof(struct delta_entry)); parse_pack_objects(pack_sha1); if (nr_deltas == nr_resolved_deltas) { stop_progress(&progress); @@ -1116,6 +1150,9 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) if (strict) check_objects(); + if (stat) + show_pack_info(stat_only); + idx_objects = xmalloc((nr_objects) * sizeof(struct pack_idx_entry *)); for (i = 0; i < nr_objects; i++) idx_objects[i] = &objects[i].idx; -- cgit v1.2.3 From d1a0ed187cbea2941a5cc10dcc43f3a7052ce32d Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 3 Jun 2011 15:32:16 -0700 Subject: index-pack: show histogram when emulating "verify-pack -v" The histogram produced by "verify-pack -v" always had an artificial limit of 50, but index-pack knows what the maximum delta depth is, so we do not have to limit it. Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'builtin') diff --git a/builtin/index-pack.c b/builtin/index-pack.c index aa3c9c6e11..ed4c3bb13b 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -70,6 +70,7 @@ static struct progress *progress; static unsigned char input_buffer[4096]; static unsigned int input_offset, input_len; static off_t consumed_bytes; +static unsigned deepest_delta; static git_SHA_CTX input_ctx; static uint32_t input_crc32; static int input_fd, output_fd, pack_fd; @@ -538,6 +539,8 @@ static void resolve_delta(struct object_entry *delta_obj, delta_obj->real_type = base->obj->real_type; delta_obj->delta_depth = base->obj->delta_depth + 1; + if (deepest_delta < delta_obj->delta_depth) + deepest_delta = delta_obj->delta_depth; delta_obj->base_object_no = base->obj - objects; delta_data = get_data_from_pack(delta_obj); base_data = get_base_data(base); @@ -973,12 +976,17 @@ static void read_idx_option(struct pack_idx_option *opts, const char *pack_name) static void show_pack_info(int stat_only) { - int i; + int i, baseobjects = nr_objects - nr_deltas; + unsigned long *chain_histogram = NULL; + + if (deepest_delta) + chain_histogram = xcalloc(deepest_delta, sizeof(unsigned long)); + for (i = 0; i < nr_objects; i++) { struct object_entry *obj = &objects[i]; - /* NEEDSWORK: Compute data necessary for the "histogram" here */ - + if (is_delta_type(obj->type)) + chain_histogram[obj->delta_depth - 1]++; if (stat_only) continue; printf("%s %-6s %lu %lu %"PRIuMAX, @@ -992,6 +1000,18 @@ static void show_pack_info(int stat_only) } putchar('\n'); } + + if (baseobjects) + printf("non delta: %d object%s\n", + baseobjects, baseobjects > 1 ? "s" : ""); + for (i = 0; i < deepest_delta; i++) { + if (!chain_histogram[i]) + continue; + printf("chain length = %d: %lu object%s\n", + i + 1, + chain_histogram[i], + chain_histogram[i] > 1 ? "s" : ""); + } } int cmd_index_pack(int argc, const char **argv, const char *prefix) -- cgit v1.2.3 From 3de89c9d4216d0fdc11bd1141c419ac4d0d35fed Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 3 Jun 2011 15:32:17 -0700 Subject: verify-pack: use index-pack --verify This finally gets rid of the inefficient verify-pack implementation that walks objects in the packfile in their object name order and replaces it with a call to index-pack --verify. As a side effect, it also removes packed_object_info_detail() API which is rather expensive. As this changes the way errors are reported (verify-pack used to rely on the usual runtime error detection routine unpack_entry() to diagnose the CRC errors in an entry in the *.idx file; index-pack --verify checks the whole *.idx file in one go), update a test that expected the string "CRC" to appear in the error message. Signed-off-by: Junio C Hamano --- builtin/verify-pack.c | 132 ++++++++++---------------------------------------- 1 file changed, 25 insertions(+), 107 deletions(-) (limited to 'builtin') diff --git a/builtin/verify-pack.c b/builtin/verify-pack.c index b6079ae6cb..e841b4a38d 100644 --- a/builtin/verify-pack.c +++ b/builtin/verify-pack.c @@ -1,134 +1,53 @@ #include "builtin.h" #include "cache.h" -#include "pack.h" -#include "pack-revindex.h" +#include "run-command.h" #include "parse-options.h" -#define MAX_CHAIN 50 - #define VERIFY_PACK_VERBOSE 01 #define VERIFY_PACK_STAT_ONLY 02 -static void show_pack_info(struct packed_git *p, unsigned int flags) -{ - uint32_t nr_objects, i; - int cnt; - int stat_only = flags & VERIFY_PACK_STAT_ONLY; - unsigned long chain_histogram[MAX_CHAIN+1], baseobjects; - - nr_objects = p->num_objects; - memset(chain_histogram, 0, sizeof(chain_histogram)); - baseobjects = 0; - - for (i = 0; i < nr_objects; i++) { - const unsigned char *sha1; - unsigned char base_sha1[20]; - const char *type; - unsigned long size; - unsigned long store_size; - off_t offset; - unsigned int delta_chain_length; - - sha1 = nth_packed_object_sha1(p, i); - if (!sha1) - die("internal error pack-check nth-packed-object"); - offset = nth_packed_object_offset(p, i); - type = packed_object_info_detail(p, offset, &size, &store_size, - &delta_chain_length, - base_sha1); - if (!stat_only) - printf("%s ", sha1_to_hex(sha1)); - if (!delta_chain_length) { - if (!stat_only) - printf("%-6s %lu %lu %"PRIuMAX"\n", - type, size, store_size, (uintmax_t)offset); - baseobjects++; - } - else { - if (!stat_only) - printf("%-6s %lu %lu %"PRIuMAX" %u %s\n", - type, size, store_size, (uintmax_t)offset, - delta_chain_length, sha1_to_hex(base_sha1)); - if (delta_chain_length <= MAX_CHAIN) - chain_histogram[delta_chain_length]++; - else - chain_histogram[0]++; - } - } - - if (baseobjects) - printf("non delta: %lu object%s\n", - baseobjects, baseobjects > 1 ? "s" : ""); - - for (cnt = 1; cnt <= MAX_CHAIN; cnt++) { - if (!chain_histogram[cnt]) - continue; - printf("chain length = %d: %lu object%s\n", cnt, - chain_histogram[cnt], - chain_histogram[cnt] > 1 ? "s" : ""); - } - if (chain_histogram[0]) - printf("chain length > %d: %lu object%s\n", MAX_CHAIN, - chain_histogram[0], - chain_histogram[0] > 1 ? "s" : ""); -} - static int verify_one_pack(const char *path, unsigned int flags) { - char arg[PATH_MAX]; - int len; + struct child_process index_pack; + const char *argv[] = {"index-pack", NULL, NULL, NULL }; + struct strbuf arg = STRBUF_INIT; int verbose = flags & VERIFY_PACK_VERBOSE; int stat_only = flags & VERIFY_PACK_STAT_ONLY; - struct packed_git *pack; int err; - len = strlcpy(arg, path, PATH_MAX); - if (len >= PATH_MAX) - return error("name too long: %s", path); - - /* - * In addition to "foo.idx" we accept "foo.pack" and "foo"; - * normalize these forms to "foo.idx" for add_packed_git(). - */ - if (has_extension(arg, ".pack")) { - strcpy(arg + len - 5, ".idx"); - len--; - } else if (!has_extension(arg, ".idx")) { - if (len + 4 >= PATH_MAX) - return error("name too long: %s.idx", arg); - strcpy(arg + len, ".idx"); - len += 4; - } + if (stat_only) + argv[1] = "--verify-stat-only"; + else if (verbose) + argv[1] = "--verify-stat"; + else + argv[1] = "--verify"; /* - * add_packed_git() uses our buffer (containing "foo.idx") to - * build the pack filename ("foo.pack"). Make sure it fits. + * In addition to "foo.pack" we accept "foo.idx" and "foo"; + * normalize these forms to "foo.pack" for "index-pack --verify". */ - if (len + 1 >= PATH_MAX) { - arg[len - 4] = '\0'; - return error("name too long: %s.pack", arg); - } - - pack = add_packed_git(arg, len, 1); - if (!pack) - return error("packfile %s not found.", arg); + strbuf_addstr(&arg, path); + if (has_extension(arg.buf, ".idx")) + strbuf_splice(&arg, arg.len - 3, 3, "pack", 4); + else if (!has_extension(arg.buf, ".pack")) + strbuf_add(&arg, ".pack", 5); + argv[2] = arg.buf; - install_packed_git(pack); + memset(&index_pack, 0, sizeof(index_pack)); + index_pack.argv = argv; + index_pack.git_cmd = 1; - if (!stat_only) - err = verify_pack(pack); - else - err = open_pack_index(pack); + err = run_command(&index_pack); if (verbose || stat_only) { if (err) - printf("%s: bad\n", pack->pack_name); + printf("%s: bad\n", arg.buf); else { - show_pack_info(pack, flags); if (!stat_only) - printf("%s: ok\n", pack->pack_name); + printf("%s: ok\n", arg.buf); } } + strbuf_release(&arg); return err; } @@ -159,7 +78,6 @@ int cmd_verify_pack(int argc, const char **argv, const char *prefix) for (i = 0; i < argc; i++) { if (verify_one_pack(argv[i], flags)) err = 1; - discard_revindex(); } return err; -- cgit v1.2.3