From 55e1805dffeb5d2a8ccd717b2d07ca8887436a69 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 27 Apr 2006 15:42:01 -0700 Subject: verify-pack: check integrity in a saner order. Check internal integrity to report corrupt pack or idx, and then check cross-integrity between idx and pack. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- pack-check.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pack-check.c b/pack-check.c index 84ed90d369..e57587909e 100644 --- a/pack-check.c +++ b/pack-check.c @@ -29,12 +29,12 @@ static int verify_packfile(struct packed_git *p) pack_base = p->pack_base; SHA1_Update(&ctx, pack_base, pack_size - 20); SHA1_Final(sha1, &ctx); - if (memcmp(sha1, index_base + index_size - 40, 20)) - return error("Packfile %s SHA1 mismatch with idx", - p->pack_name); if (memcmp(sha1, pack_base + pack_size - 20, 20)) return error("Packfile %s SHA1 mismatch with itself", p->pack_name); + if (memcmp(sha1, index_base + index_size - 40, 20)) + return error("Packfile %s SHA1 mismatch with idx", + p->pack_name); /* Make sure everything reachable from idx is valid. Since we * have verified that nr_objects matches between idx and pack, -- cgit v1.2.3 From 9a8b6a0a9d4de3d9c1005b872b9b57a213d3e9f8 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 27 Apr 2006 19:31:46 -0700 Subject: pack-objects: update size heuristucs. We used to omit delta base candidates that is much bigger than the target, but delta size does not grow when we delete more, so that was not a very good heuristics. Signed-off-by: Junio C Hamano --- pack-objects.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pack-objects.c b/pack-objects.c index c0acc460bb..6604338131 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -1032,12 +1032,6 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de max_depth -= cur_entry->delta_limit; } - size = cur_entry->size; - oldsize = old_entry->size; - sizediff = oldsize > size ? oldsize - size : size - oldsize; - - if (size < 50) - return -1; if (old_entry->depth >= max_depth) return 0; @@ -1048,9 +1042,12 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de * more space-efficient (deletes don't have to say _what_ they * delete). */ + size = cur_entry->size; max_size = size / 2 - 20; if (cur_entry->delta) max_size = cur_entry->delta_size-1; + oldsize = old_entry->size; + sizediff = oldsize < size ? size - oldsize : 0; if (sizediff >= max_size) return 0; delta_buf = diff_delta(old->data, oldsize, @@ -1109,6 +1106,9 @@ static void find_deltas(struct object_entry **list, int window, int depth) */ continue; + if (entry->size < 50) + continue; + free(n->data); n->entry = entry; n->data = read_sha1_file(entry->sha1, type, &size); -- cgit v1.2.3 From c74320872b445104fe5c265e60785d9d26d94cc5 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 27 Apr 2006 15:37:18 -0700 Subject: built-in count-objects. Also it learned to do -v (verbose) to report: - number of loose objects - disk occupied by loose objects - number of objects in local packs - number of loose objects that are also in pack - unrecognised garbage in .git/objects/??/. Signed-off-by: Junio C Hamano --- Makefile | 2 +- builtin-count.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ builtin.h | 1 + git.c | 1 + 4 files changed, 126 insertions(+), 1 deletion(-) create mode 100644 builtin-count.c diff --git a/Makefile b/Makefile index 8ce27a65fb..14193aa1e2 100644 --- a/Makefile +++ b/Makefile @@ -214,7 +214,7 @@ LIB_OBJS = \ $(DIFF_OBJS) BUILTIN_OBJS = \ - builtin-log.o builtin-help.o + builtin-log.o builtin-help.o builtin-count.o GITLIBS = $(LIB_FILE) $(XDIFF_LIB) LIBS = $(GITLIBS) -lz diff --git a/builtin-count.c b/builtin-count.c new file mode 100644 index 0000000000..0256369d5b --- /dev/null +++ b/builtin-count.c @@ -0,0 +1,123 @@ +/* + * Builtin "git count-objects". + * + * Copyright (c) 2006 Junio C Hamano + */ + +#include "cache.h" +#include "builtin.h" + +static const char count_objects_usage[] = "git-count-objects [-v]"; + +static void count_objects(DIR *d, char *path, int len, int verbose, + unsigned long *loose, + unsigned long *loose_size, + unsigned long *packed_loose, + unsigned long *garbage) +{ + struct dirent *ent; + while ((ent = readdir(d)) != NULL) { + char hex[41]; + unsigned char sha1[20]; + const char *cp; + int bad = 0; + + if ((ent->d_name[0] == '.') && + (ent->d_name[1] == 0 || + ((ent->d_name[1] == '.') && (ent->d_name[2] == 0)))) + continue; + for (cp = ent->d_name; *cp; cp++) { + int ch = *cp; + if (('0' <= ch && ch <= '9') || + ('a' <= ch && ch <= 'f')) + continue; + bad = 1; + break; + } + if (cp - ent->d_name != 38) + bad = 1; + else { + struct stat st; + memcpy(path + len + 3, ent->d_name, 38); + path[len + 2] = '/'; + path[len + 41] = 0; + if (lstat(path, &st) || !S_ISREG(st.st_mode)) + bad = 1; + else + (*loose_size) += st.st_blocks; + } + if (bad) { + if (verbose) { + error("garbage found: %.*s/%s", + len + 2, path, ent->d_name); + (*garbage)++; + } + continue; + } + (*loose)++; + if (!verbose) + continue; + memcpy(hex, path+len, 2); + memcpy(hex+2, ent->d_name, 38); + hex[40] = 0; + if (get_sha1_hex(hex, sha1)) + die("internal error"); + if (has_sha1_pack(sha1)) + (*packed_loose)++; + } +} + +int cmd_count_objects(int ac, const char **av, char **ep) +{ + int i; + int verbose = 0; + const char *objdir = get_object_directory(); + int len = strlen(objdir); + char *path = xmalloc(len + 50); + unsigned long loose = 0, packed = 0, packed_loose = 0, garbage = 0; + unsigned long loose_size = 0; + + for (i = 1; i < ac; i++) { + const char *arg = av[i]; + if (*arg != '-') + break; + else if (!strcmp(arg, "-v")) + verbose = 1; + else + usage(count_objects_usage); + } + + /* we do not take arguments other than flags for now */ + if (i < ac) + usage(count_objects_usage); + memcpy(path, objdir, len); + if (len && objdir[len-1] != '/') + path[len++] = '/'; + for (i = 0; i < 256; i++) { + DIR *d; + sprintf(path + len, "%02x", i); + d = opendir(path); + if (!d) + continue; + count_objects(d, path, len, verbose, + &loose, &loose_size, &packed_loose, &garbage); + closedir(d); + } + if (verbose) { + struct packed_git *p; + for (p = packed_git; p; p = p->next) { + if (!p->pack_local) + continue; + packed += num_packed_objects(p); + } + printf("count: %lu\n", loose); + printf("size: %lu\n", loose_size / 2); + printf("in-pack: %lu\n", packed); + printf("prune-packable: %lu\n", packed_loose); + printf("garbage: %lu\n", garbage); + } + else + printf("%lu objects, %lu kilobytes\n", + loose, loose_size / 2); + return 0; +} diff --git a/builtin.h b/builtin.h index 47408a0585..76169e3f05 100644 --- a/builtin.h +++ b/builtin.h @@ -19,5 +19,6 @@ extern int cmd_version(int argc, const char **argv, char **envp); extern int cmd_whatchanged(int argc, const char **argv, char **envp); extern int cmd_show(int argc, const char **argv, char **envp); extern int cmd_log(int argc, const char **argv, char **envp); +extern int cmd_count_objects(int argc, const char **argv, char **envp); #endif diff --git a/git.c b/git.c index 01b7e28b8c..00fb399725 100644 --- a/git.c +++ b/git.c @@ -46,6 +46,7 @@ static void handle_internal_command(int argc, const char **argv, char **envp) { "log", cmd_log }, { "whatchanged", cmd_whatchanged }, { "show", cmd_show }, + { "count-objects", cmd_count_objects }, }; int i; -- cgit v1.2.3