diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | Documentation/git-repo-config.txt | 4 | ||||
-rw-r--r-- | Documentation/git-var.txt | 3 | ||||
-rw-r--r-- | Makefile | 9 | ||||
-rw-r--r-- | apply.c | 5 | ||||
-rw-r--r-- | builtin-log.c | 39 | ||||
-rw-r--r-- | builtin.h | 1 | ||||
-rw-r--r-- | cache-tree.c | 503 | ||||
-rw-r--r-- | cache-tree.h | 30 | ||||
-rw-r--r-- | cache.h | 1 | ||||
-rw-r--r-- | checkout-index.c | 1 | ||||
-rw-r--r-- | commit.c | 37 | ||||
-rw-r--r-- | commit.h | 1 | ||||
-rw-r--r-- | delta.h | 75 | ||||
-rw-r--r-- | diff-delta.c | 168 | ||||
-rw-r--r-- | dump-cache-tree.c | 31 | ||||
-rw-r--r-- | fsck-objects.c | 18 | ||||
-rwxr-xr-x | git-cvsserver.perl | 6 | ||||
-rw-r--r-- | git.c | 1 | ||||
-rw-r--r-- | gsimm.c | 94 | ||||
-rw-r--r-- | gsimm.h | 28 | ||||
-rw-r--r-- | log-tree.c | 27 | ||||
-rw-r--r-- | patch-delta.c | 4 | ||||
-rw-r--r-- | rabinpoly.c | 154 | ||||
-rw-r--r-- | rabinpoly.h | 31 | ||||
-rw-r--r-- | read-cache.c | 72 | ||||
-rw-r--r-- | read-tree.c | 2 | ||||
-rw-r--r-- | repo-config.c | 16 | ||||
-rw-r--r-- | test-gsimm.c | 209 | ||||
-rw-r--r-- | update-index.c | 13 | ||||
-rw-r--r-- | write-tree.c | 147 |
31 files changed, 1496 insertions, 236 deletions
diff --git a/.gitignore b/.gitignore index b5959d6311..cdb2d53318 100644 --- a/.gitignore +++ b/.gitignore @@ -123,6 +123,8 @@ git-write-tree git-core-*/?* test-date test-delta +test-dump-cache-tree +test-gsimm common-cmds.h *.tar.gz *.dsc diff --git a/Documentation/git-repo-config.txt b/Documentation/git-repo-config.txt index 71f96bdd10..566cfa1836 100644 --- a/Documentation/git-repo-config.txt +++ b/Documentation/git-repo-config.txt @@ -15,6 +15,7 @@ SYNOPSIS 'git-repo-config' [type] --get-all name [value_regex] 'git-repo-config' [type] --unset name [value_regex] 'git-repo-config' [type] --unset-all name [value_regex] +'git-repo-config' -l | --list DESCRIPTION ----------- @@ -64,6 +65,9 @@ OPTIONS --unset-all:: Remove all matching lines from .git/config. +-l, --list:: + List all variables set in .git/config. + EXAMPLE ------- diff --git a/Documentation/git-var.txt b/Documentation/git-var.txt index 379571eef0..a5b1a0dbab 100644 --- a/Documentation/git-var.txt +++ b/Documentation/git-var.txt @@ -19,7 +19,8 @@ OPTIONS -l:: Cause the logical variables to be listed. In addition, all the variables of the git configuration file .git/config are listed - as well. + as well. (However, the configuration variables listing functionality + is deprecated in favor of `git-repo-config -l`.) EXAMPLE -------- @@ -204,7 +204,7 @@ DIFF_OBJS = \ diffcore-delta.o log-tree.o LIB_OBJS = \ - blob.o commit.o connect.o csum-file.o \ + blob.o commit.o connect.o csum-file.o cache-tree.o \ date.o diff-delta.o entry.o exec_cmd.o ident.o index.o \ object.o pack-check.o patch-delta.o path.o pkt-line.o \ quote.o read-cache.o refs.o run-command.o \ @@ -611,6 +611,12 @@ test-date$X: test-date.c date.o ctype.o test-delta$X: test-delta.c diff-delta.o patch-delta.o $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $^ -lz +test-dump-cache-tree$X: dump-cache-tree.o $(GITLIBS) + $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) + +test-gsimm$X: test-gsimm.c gsimm.o rabinpoly.o + $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $^ + check: for i in *.c; do sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; done @@ -659,6 +665,7 @@ clean: rm -f *.o mozilla-sha1/*.o arm/*.o ppc/*.o compat/*.o xdiff/*.o \ $(LIB_FILE) $(XDIFF_LIB) rm -f $(ALL_PROGRAMS) $(BUILT_INS) git$X + rm -f test-date$X test-delta$X test-gsimm$X rm -f *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags rm -rf $(GIT_TARNAME) rm -f $(GIT_TARNAME).tar.gz git-core_$(GIT_VERSION)-*.tar.gz @@ -8,6 +8,7 @@ */ #include <fnmatch.h> #include "cache.h" +#include "cache-tree.h" #include "quote.h" #include "blob.h" @@ -1717,6 +1718,7 @@ static void remove_file(struct patch *patch) if (write_index) { if (remove_file_from_cache(patch->old_name) < 0) die("unable to remove %s from index", patch->old_name); + cache_tree_invalidate_path(active_cache_tree, patch->old_name); } unlink(patch->old_name); } @@ -1813,8 +1815,9 @@ static void create_file(struct patch *patch) if (!mode) mode = S_IFREG | 0644; - create_one_file(path, mode, buf, size); + create_one_file(path, mode, buf, size); add_index_file(path, mode, buf, size); + cache_tree_invalidate_path(active_cache_tree, path); } static void write_out_one_result(struct patch *patch) diff --git a/builtin-log.c b/builtin-log.c index 69f2911cb4..a39aed6d86 100644 --- a/builtin-log.c +++ b/builtin-log.c @@ -9,6 +9,7 @@ #include "diff.h" #include "revision.h" #include "log-tree.h" +#include "builtin.h" static int cmd_log_wc(int argc, const char **argv, char **envp, struct rev_info *rev) @@ -67,3 +68,41 @@ int cmd_log(int argc, const char **argv, char **envp) rev.diffopt.recursive = 1; return cmd_log_wc(argc, argv, envp, &rev); } + +int cmd_format_patch(int argc, const char **argv, char **envp) +{ + struct commit *commit; + struct commit **list = NULL; + struct rev_info rev; + int nr = 0; + + init_revisions(&rev); + rev.commit_format = CMIT_FMT_EMAIL; + rev.verbose_header = 1; + rev.diff = 1; + rev.diffopt.with_raw = 0; + rev.diffopt.with_stat = 1; + rev.combine_merges = 0; + rev.ignore_merges = 1; + rev.diffopt.output_format = DIFF_FORMAT_PATCH; + argc = setup_revisions(argc, argv, &rev, "HEAD"); + + prepare_revision_walk(&rev); + while ((commit = get_revision(&rev)) != NULL) { + nr++; + list = realloc(list, nr * sizeof(list[0])); + list[nr - 1] = commit; + } + while (0 <= --nr) { + int shown; + commit = list[nr]; + shown = log_tree_commit(&rev, commit); + free(commit->buffer); + commit->buffer = NULL; + if (shown) + printf("-- \n%s\n\n", git_version_string); + } + free(list); + return 0; +} + @@ -19,5 +19,6 @@ extern int cmd_version(int argc, const char **argv, char **envp); extern int cmd_whatchanged(int argc, const char **argv, char **envp); extern int cmd_show(int argc, const char **argv, char **envp); extern int cmd_log(int argc, const char **argv, char **envp); +extern int cmd_format_patch(int argc, const char **argv, char **envp); #endif diff --git a/cache-tree.c b/cache-tree.c new file mode 100644 index 0000000000..d8438d67d7 --- /dev/null +++ b/cache-tree.c @@ -0,0 +1,503 @@ +#include "cache.h" +#include "tree.h" +#include "cache-tree.h" + +#define DEBUG 0 + +struct cache_tree *cache_tree(void) +{ + struct cache_tree *it = xcalloc(1, sizeof(struct cache_tree)); + it->entry_count = -1; + return it; +} + +void cache_tree_free(struct cache_tree **it_p) +{ + int i; + struct cache_tree *it = *it_p; + + if (!it) + return; + for (i = 0; i < it->subtree_nr; i++) + if (it->down[i]) + cache_tree_free(&it->down[i]->cache_tree); + free(it->down); + free(it); + *it_p = NULL; +} + +static int subtree_name_cmp(const char *one, int onelen, + const char *two, int twolen) +{ + if (onelen < twolen) + return -1; + if (twolen < onelen) + return 1; + return memcmp(one, two, onelen); +} + +static int subtree_pos(struct cache_tree *it, const char *path, int pathlen) +{ + struct cache_tree_sub **down = it->down; + int lo, hi; + lo = 0; + hi = it->subtree_nr; + while (lo < hi) { + int mi = (lo + hi) / 2; + struct cache_tree_sub *mdl = down[mi]; + int cmp = subtree_name_cmp(path, pathlen, + mdl->name, mdl->namelen); + if (!cmp) + return mi; + if (cmp < 0) + hi = mi; + else + lo = mi + 1; + } + return -lo-1; +} + +static struct cache_tree_sub *find_subtree(struct cache_tree *it, + const char *path, + int pathlen, + int create) +{ + struct cache_tree_sub *down; + int pos = subtree_pos(it, path, pathlen); + if (0 <= pos) + return it->down[pos]; + if (!create) + return NULL; + + pos = -pos-1; + if (it->subtree_alloc <= it->subtree_nr) { + it->subtree_alloc = alloc_nr(it->subtree_alloc); + it->down = xrealloc(it->down, it->subtree_alloc * + sizeof(*it->down)); + } + it->subtree_nr++; + + down = xmalloc(sizeof(*down) + pathlen + 1); + down->cache_tree = NULL; + down->namelen = pathlen; + memcpy(down->name, path, pathlen); + down->name[pathlen] = 0; + + if (pos < it->subtree_nr) + memmove(it->down + pos + 1, + it->down + pos, + sizeof(down) * (it->subtree_nr - pos - 1)); + it->down[pos] = down; + return down; +} + +void cache_tree_invalidate_path(struct cache_tree *it, const char *path) +{ + /* a/b/c + * ==> invalidate self + * ==> find "a", have it invalidate "b/c" + * a + * ==> invalidate self + * ==> if "a" exists as a subtree, remove it. + */ + const char *slash; + int namelen; + struct cache_tree_sub *down; + + if (!it) + return; + slash = strchr(path, '/'); + it->entry_count = -1; + if (!slash) { + int pos; + namelen = strlen(path); + pos = subtree_pos(it, path, namelen); + if (0 <= pos) { + cache_tree_free(&it->down[pos]->cache_tree); + free(it->down[pos]); + /* 0 1 2 3 4 5 + * ^ ^subtree_nr = 6 + * pos + * move 4 and 5 up one place (2 entries) + * 2 = 6 - 3 - 1 = subtree_nr - pos - 1 + */ + memmove(it->down+pos, it->down+pos+1, + sizeof(struct cache_tree_sub *) * + (it->subtree_nr - pos - 1)); + it->subtree_nr--; + } + return; + } + namelen = slash - path; + down = find_subtree(it, path, namelen, 0); + if (down) + cache_tree_invalidate_path(down->cache_tree, slash + 1); +} + +static int verify_cache(struct cache_entry **cache, + int entries) +{ + int i, funny; + + /* Verify that the tree is merged */ + funny = 0; + for (i = 0; i < entries; i++) { + struct cache_entry *ce = cache[i]; + if (ce_stage(ce)) { + if (10 < ++funny) { + fprintf(stderr, "...\n"); + break; + } + fprintf(stderr, "%s: unmerged (%s)\n", + ce->name, sha1_to_hex(ce->sha1)); + } + } + if (funny) + return -1; + + /* Also verify that the cache does not have path and path/file + * at the same time. At this point we know the cache has only + * stage 0 entries. + */ + funny = 0; + for (i = 0; i < entries - 1; i++) { + /* path/file always comes after path because of the way + * the cache is sorted. Also path can appear only once, + * which means conflicting one would immediately follow. + */ + const char *this_name = cache[i]->name; + const char *next_name = cache[i+1]->name; + int this_len = strlen(this_name); + if (this_len < strlen(next_name) && + strncmp(this_name, next_name, this_len) == 0 && + next_name[this_len] == '/') { + if (10 < ++funny) { + fprintf(stderr, "...\n"); + break; + } + fprintf(stderr, "You have both %s and %s\n", + this_name, next_name); + } + } + if (funny) + return -1; + return 0; +} + +static void discard_unused_subtrees(struct cache_tree *it) +{ + struct cache_tree_sub **down = it->down; + int nr = it->subtree_nr; + int dst, src; + for (dst = src = 0; src < nr; src++) { + struct cache_tree_sub *s = down[src]; + if (s->used) + down[dst++] = s; + else { + cache_tree_free(&s->cache_tree); + free(s); + it->subtree_nr--; + } + } +} + +int cache_tree_fully_valid(struct cache_tree *it) +{ + int i; + if (!it) + return 0; + if (it->entry_count < 0 || !has_sha1_file(it->sha1)) + return 0; + for (i = 0; i < it->subtree_nr; i++) { + if (!cache_tree_fully_valid(it->down[i]->cache_tree)) + return 0; + } + return 1; +} + +static int update_one(struct cache_tree *it, + struct cache_entry **cache, + int entries, + const char *base, + int baselen, + int missing_ok) +{ + unsigned long size, offset; + char *buffer; + int i; + + if (0 <= it->entry_count && has_sha1_file(it->sha1)) + return it->entry_count; + + /* + * We first scan for subtrees and update them; we start by + * marking existing subtrees -- the ones that are unmarked + * should not be in the result. + */ + for (i = 0; i < it->subtree_nr; i++) + it->down[i]->used = 0; + + /* + * Find the subtrees and update them. + */ + for (i = 0; i < entries; i++) { + struct cache_entry *ce = cache[i]; + struct cache_tree_sub *sub; + const char *path, *slash; + int pathlen, sublen, subcnt; + + path = ce->name; + pathlen = ce_namelen(ce); + if (pathlen <= baselen || memcmp(base, path, baselen)) + break; /* at the end of this level */ + + slash = strchr(path + baselen, '/'); + if (!slash) + continue; + /* + * a/bbb/c (base = a/, slash = /c) + * ==> + * path+baselen = bbb/c, sublen = 3 + */ + sublen = slash - (path + baselen); + sub = find_subtree(it, path + baselen, sublen, 1); + if (!sub->cache_tree) + sub->cache_tree = cache_tree(); + subcnt = update_one(sub->cache_tree, + cache + i, entries - i, + path, + baselen + sublen + 1, + missing_ok); + i += subcnt - 1; + sub->used = 1; + } + + discard_unused_subtrees(it); + + /* + * Then write out the tree object for this level. + */ + size = 8192; + buffer = xmalloc(size); + offset = 0; + + for (i = 0; i < entries; i++) { + struct cache_entry *ce = cache[i]; + struct cache_tree_sub *sub; + const char *path, *slash; + int pathlen, entlen; + const unsigned char *sha1; + unsigned mode; + + path = ce->name; + pathlen = ce_namelen(ce); + if (pathlen <= baselen || memcmp(base, path, baselen)) + break; /* at the end of this level */ + + slash = strchr(path + baselen, '/'); + if (slash) { + entlen = slash - (path + baselen); + sub = find_subtree(it, path + baselen, entlen, 0); + if (!sub) + die("cache-tree.c: '%.*s' in '%s' not found", + entlen, path + baselen, path); + i += sub->cache_tree->entry_count - 1; + sha1 = sub->cache_tree->sha1; + mode = S_IFDIR; + } + else { + sha1 = ce->sha1; + mode = ntohl(ce->ce_mode); + entlen = pathlen - baselen; + } + if (!missing_ok && !has_sha1_file(sha1)) + return error("invalid object %s", sha1_to_hex(sha1)); + + if (!ce->ce_mode) + continue; /* entry being removed */ + + if (size < offset + entlen + 100) { + size = alloc_nr(offset + entlen + 100); + buffer = xrealloc(buffer, size); + } + offset += sprintf(buffer + offset, + "%o %.*s", mode, entlen, path + baselen); + buffer[offset++] = 0; + memcpy(buffer + offset, sha1, 20); + offset += 20; + +#if DEBUG + fprintf(stderr, "cache-tree %o %.*s\n", + mode, entlen, path + baselen); +#endif + } + + write_sha1_file(buffer, offset, tree_type, it->sha1); + free(buffer); + it->entry_count = i; +#if DEBUG + fprintf(stderr, "cache-tree (%d ent, %d subtree) %s\n", + it->entry_count, it->subtree_nr, + sha1_to_hex(it->sha1)); +#endif + return i; +} + +int cache_tree_update(struct cache_tree *it, + struct cache_entry **cache, + int entries, + int missing_ok) +{ + int i; + i = verify_cache(cache, entries); + if (i) + return i; + i = update_one(it, cache, entries, "", 0, missing_ok); + if (i < 0) + return i; + return 0; +} + +static void *write_one(struct cache_tree *it, + char *path, + int pathlen, + char *buffer, + unsigned long *size, + unsigned long *offset) +{ + int i; + + /* One "cache-tree" entry consists of the following: + * path (NUL terminated) + * entry_count, subtree_nr ("%d %d\n") + * tree-sha1 (missing if invalid) + * subtree_nr "cache-tree" entries for subtrees. + */ + if (*size < *offset + pathlen + 100) { + *size = alloc_nr(*offset + pathlen + 100); + buffer = xrealloc(buffer, *size); + } + *offset += sprintf(buffer + *offset, "%.*s%c%d %d\n", + pathlen, path, 0, + it->entry_count, it->subtree_nr); + +#if DEBUG + if (0 <= it->entry_count) + fprintf(stderr, "cache-tree <%.*s> (%d ent, %d subtree) %s\n", + pathlen, path, it->entry_count, it->subtree_nr, + sha1_to_hex(it->sha1)); + else + fprintf(stderr, "cache-tree <%.*s> (%d subtree) invalid\n", + pathlen, path, it->subtree_nr); +#endif + + if (0 <= it->entry_count) { + memcpy(buffer + *offset, it->sha1, 20); + *offset += 20; + } + for (i = 0; i < it->subtree_nr; i++) { + struct cache_tree_sub *down = it->down[i]; + if (i) { + struct cache_tree_sub *prev = it->down[i-1]; + if (subtree_name_cmp(down->name, down->namelen, + prev->name, prev->namelen) <= 0) + die("fatal - unsorted cache subtree"); + } + buffer = write_one(down->cache_tree, down->name, down->namelen, + buffer, size, offset); + } + return buffer; +} + +void *cache_tree_write(struct cache_tree *root, unsigned long *size_p) +{ + char path[PATH_MAX]; + unsigned long size = 8192; + char *buffer = xmalloc(size); + + *size_p = 0; + path[0] = 0; + return write_one(root, path, 0, buffer, &size, size_p); +} + +static struct cache_tree *read_one(const char **buffer, unsigned long *size_p) +{ + const char *buf = *buffer; + unsigned long size = *size_p; + struct cache_tree *it; + int i, subtree_nr; + + it = NULL; + /* skip name, but make sure name exists */ + while (size && *buf) { + size--; + buf++; + } + if (!size) + goto free_return; + buf++; size--; + it = cache_tree(); + if (sscanf(buf, "%d %d\n", &it->entry_count, &subtree_nr) != 2) + goto free_return; + while (size && *buf && *buf != '\n') { + size--; + buf++; + } + if (!size) + goto free_return; + buf++; size--; + if (0 <= it->entry_count) { + if (size < 20) + goto free_return; + memcpy(it->sha1, buf, 20); + buf += 20; + size -= 20; + } + +#if DEBUG + if (0 <= it->entry_count) + fprintf(stderr, "cache-tree <%s> (%d ent, %d subtree) %s\n", + *buffer, it->entry_count, subtree_nr, + sha1_to_hex(it->sha1)); + else + fprintf(stderr, "cache-tree <%s> (%d subtrees) invalid\n", + *buffer, subtree_nr); +#endif + + /* + * Just a heuristic -- we do not add directories that often but + * we do not want to have to extend it immediately when we do, + * hence +2. + */ + it->subtree_alloc = subtree_nr + 2; + it->down = xcalloc(it->subtree_alloc, sizeof(struct cache_tree_sub *)); + for (i = 0; i < subtree_nr; i++) { + /* read each subtree */ + struct cache_tree *sub; + struct cache_tree_sub *subtree; + const char *name = buf; + int namelen; + sub = read_one(&buf, &size); + if (!sub) + goto free_return; + namelen = strlen(name); + subtree = find_subtree(it, name, namelen, 1); + subtree->cache_tree = sub; + } + if (subtree_nr != it->subtree_nr) + die("cache-tree: internal error"); + *buffer = buf; + *size_p = size; + return it; + + free_return: + cache_tree_free(&it); + return NULL; +} + +struct cache_tree *cache_tree_read(const char *buffer, unsigned long size) +{ + if (buffer[0]) + return NULL; /* not the whole tree */ + return read_one(&buffer, &size); +} diff --git a/cache-tree.h b/cache-tree.h new file mode 100644 index 0000000000..c70a7699a9 --- /dev/null +++ b/cache-tree.h @@ -0,0 +1,30 @@ +#ifndef CACHE_TREE_H +#define CACHE_TREE_H + +struct cache_tree; +struct cache_tree_sub { + struct cache_tree *cache_tree; + int namelen; + int used; + char name[FLEX_ARRAY]; +}; + +struct cache_tree { + int entry_count; /* negative means "invalid" */ + unsigned char sha1[20]; + int subtree_nr; + int subtree_alloc; + struct cache_tree_sub **down; +}; + +struct cache_tree *cache_tree(void); +void cache_tree_free(struct cache_tree **); +void cache_tree_invalidate_path(struct cache_tree *, const char *); + +void *cache_tree_write(struct cache_tree *root, unsigned long *size_p); +struct cache_tree *cache_tree_read(const char *buffer, unsigned long size); + +int cache_tree_fully_valid(struct cache_tree *); +int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int); + +#endif @@ -114,6 +114,7 @@ static inline unsigned int create_ce_mode(unsigned int mode) extern struct cache_entry **active_cache; extern unsigned int active_nr, active_alloc, active_cache_changed; +extern struct cache_tree *active_cache_tree; #define GIT_DIR_ENVIRONMENT "GIT_DIR" #define DEFAULT_GIT_DIR_ENVIRONMENT ".git" diff --git a/checkout-index.c b/checkout-index.c index dd6a2d86fe..e56c354f8c 100644 --- a/checkout-index.c +++ b/checkout-index.c @@ -39,6 +39,7 @@ #include "cache.h" #include "strbuf.h" #include "quote.h" +#include "cache-tree.h" #define CHECKOUT_ALL 4 static const char *prefix; @@ -36,6 +36,8 @@ enum cmit_fmt get_commit_format(const char *arg) return CMIT_FMT_FULL; if (!strcmp(arg, "=fuller")) return CMIT_FMT_FULLER; + if (!strcmp(arg, "=email")) + return CMIT_FMT_EMAIL; if (!strcmp(arg, "=oneline")) return CMIT_FMT_ONELINE; die("invalid --pretty format"); @@ -428,6 +430,10 @@ static int add_user_info(const char *what, enum cmit_fmt fmt, char *buf, const c time = strtoul(date, &date, 10); tz = strtol(date, NULL, 10); + if (fmt == CMIT_FMT_EMAIL) { + what = "From"; + filler = ""; + } ret = sprintf(buf, "%s: %.*s%.*s\n", what, (fmt == CMIT_FMT_FULLER) ? 4 : 0, filler, namelen, line); @@ -435,6 +441,9 @@ static int add_user_info(const char *what, enum cmit_fmt fmt, char *buf, const c case CMIT_FMT_MEDIUM: ret += sprintf(buf + ret, "Date: %s\n", show_date(time, tz)); break; + case CMIT_FMT_EMAIL: + ret += sprintf(buf + ret, "Date: %s\n", show_date(time, tz)); + break; case CMIT_FMT_FULLER: ret += sprintf(buf + ret, "%sDate: %s\n", what, show_date(time, tz)); break; @@ -445,10 +454,12 @@ static int add_user_info(const char *what, enum cmit_fmt fmt, char *buf, const c return ret; } -static int is_empty_line(const char *line, int len) +static int is_empty_line(const char *line, int *len_p) { + int len = *len_p; while (len && isspace(line[len-1])) len--; + *len_p = len; return !len; } @@ -457,7 +468,8 @@ static int add_merge_info(enum cmit_fmt fmt, char *buf, const struct commit *com struct commit_list *parent = commit->parents; int offset; - if ((fmt == CMIT_FMT_ONELINE) || !parent || !parent->next) + if ((fmt == CMIT_FMT_ONELINE) || (fmt == CMIT_FMT_EMAIL) || + !parent || !parent->next) return 0; offset = sprintf(buf, "Merge:"); @@ -480,9 +492,15 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit { int hdr = 1, body = 0; unsigned long offset = 0; - int indent = (fmt == CMIT_FMT_ONELINE) ? 0 : 4; + int indent = 4; int parents_shown = 0; const char *msg = commit->buffer; + const char *subject = NULL; + + if (fmt == CMIT_FMT_EMAIL) + subject = "Subject: [PATCH] "; + if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL) + indent = 0; for (;;) { const char *line = msg; @@ -506,7 +524,7 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit if (hdr) { if (linelen == 1) { hdr = 0; - if (fmt != CMIT_FMT_ONELINE) + if ((fmt != CMIT_FMT_ONELINE) && !subject) buf[offset++] = '\n'; continue; } @@ -544,20 +562,29 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit continue; } - if (is_empty_line(line, linelen)) { + if (is_empty_line(line, &linelen)) { if (!body) continue; + if (subject) + continue; if (fmt == CMIT_FMT_SHORT) break; } else { body = 1; } + if (subject) { + int slen = strlen(subject); + memcpy(buf + offset, subject, slen); + offset += slen; + } memset(buf + offset, ' ', indent); memcpy(buf + offset + indent, line, linelen); offset += linelen + indent; + buf[offset++] = '\n'; if (fmt == CMIT_FMT_ONELINE) break; + subject = NULL; } while (offset && isspace(buf[offset-1])) offset--; @@ -45,6 +45,7 @@ enum cmit_fmt { CMIT_FMT_FULL, CMIT_FMT_FULLER, CMIT_FMT_ONELINE, + CMIT_FMT_EMAIL, CMIT_FMT_UNSPECIFIED, }; @@ -1,12 +1,73 @@ #ifndef DELTA_H #define DELTA_H -/* handling of delta buffers */ -extern void *diff_delta(void *from_buf, unsigned long from_size, - void *to_buf, unsigned long to_size, - unsigned long *delta_size, unsigned long max_size); -extern void *patch_delta(void *src_buf, unsigned long src_size, - void *delta_buf, unsigned long delta_size, +/* opaque object for delta index */ +struct delta_index; + +/* + * create_delta_index: compute index data from given buffer + * + * This returns a pointer to a struct delta_index that should be passed to + * subsequent create_delta() calls, or to free_delta_index(). A NULL pointer + * is returned on failure. The given buffer must not be freed nor altered + * before free_delta_index() is called. The returned pointer must be freed + * using free_delta_index(). + */ +extern struct delta_index * +create_delta_index(const void *buf, unsigned long bufsize); + +/* + * free_delta_index: free the index created by create_delta_index() + */ +extern void free_delta_index(struct delta_index *index); + +/* + * create_delta: create a delta from given index for the given buffer + * + * This function may be called multiple times with different buffers using + * the same delta_index pointer. If max_delta_size is non-zero and the + * resulting delta is to be larger than max_delta_size then NULL is returned. + * On success, a non-NULL pointer to the buffer with the delta data is + * returned and *delta_size is updated with its size. The returned buffer + * must be freed by the caller. + */ +extern void * +create_delta(const struct delta_index *index, + const void *buf, unsigned long bufsize, + unsigned long *delta_size, unsigned long max_delta_size); + +/* + * diff_delta: create a delta from source buffer to target buffer + * + * If max_delta_size is non-zero and the resulting delta is to be larger + * than max_delta_size then NULL is returned. On success, a non-NULL + * pointer to the buffer with the delta data is returned and *delta_size is + * updated with its size. The returned buffer must be freed by the caller. + */ +static inline void * +diff_delta(const void *src_buf, unsigned long src_bufsize, + const void *trg_buf, unsigned long trg_bufsize, + unsigned long *delta_size, unsigned long max_delta_size) +{ + struct delta_index *index = create_delta_index(src_buf, src_bufsize); + if (index) { + void *delta = create_delta(index, trg_buf, trg_bufsize, + delta_size, max_delta_size); + free_delta_index(index); + return delta; + } + return NULL; +} + +/* + * patch_delta: recreate target buffer given source buffer and delta data + * + * On success, a non-NULL pointer to the target buffer is returned and + * *trg_bufsize is updated with its size. On failure a NULL pointer is + * returned. The returned buffer must be freed by the caller. + */ +extern void *patch_delta(const void *src_buf, unsigned long src_size, + const void *delta_buf, unsigned long delta_size, unsigned long *dst_size); /* the smallest possible delta size is 4 bytes */ @@ -14,7 +75,7 @@ extern void *patch_delta(void *src_buf, unsigned long src_size, /* * This must be called twice on the delta data buffer, first to get the - * expected reference buffer size, and again to get the result buffer size. + * expected source buffer size, and again to get the target buffer size. */ static inline unsigned long get_delta_hdr_size(const unsigned char **datap, const unsigned char *top) diff --git a/diff-delta.c b/diff-delta.c index 1188b31cd0..fdedf94cce 100644 --- a/diff-delta.c +++ b/diff-delta.c @@ -27,53 +27,70 @@ /* block size: min = 16, max = 64k, power of 2 */ #define BLK_SIZE 16 -#define MIN(a, b) ((a) < (b) ? (a) : (b)) +/* maximum hash entry list for the same hash bucket */ +#define HASH_LIMIT 64 #define GR_PRIME 0x9e370001 #define HASH(v, shift) (((unsigned int)(v) * GR_PRIME) >> (shift)) -struct index { +struct index_entry { const unsigned char *ptr; unsigned int val; - struct index *next; + struct index_entry *next; }; -static struct index ** delta_index(const unsigned char *buf, - unsigned long bufsize, - unsigned long trg_bufsize, - unsigned int *hash_shift) +struct delta_index { + const void *src_buf; + unsigned long src_size; + unsigned int hash_shift; + struct index_entry *hash[0]; +}; + +struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) { - unsigned int i, hsize, hshift, hlimit, entries, *hash_count; - const unsigned char *data; - struct index *entry, **hash; + unsigned int i, hsize, hshift, entries, *hash_count; + const unsigned char *data, *buffer = buf; + struct delta_index *index; + struct index_entry *entry, **hash; void *mem; + if (!buf || !bufsize) + return NULL; + /* determine index hash size */ entries = bufsize / BLK_SIZE; hsize = entries / 4; for (i = 4; (1 << i) < hsize && i < 31; i++); hsize = 1 << i; hshift = 32 - i; - *hash_shift = hshift; /* allocate lookup index */ - mem = malloc(hsize * sizeof(*hash) + entries * sizeof(*entry)); + mem = malloc(sizeof(*index) + + sizeof(*hash) * hsize + + sizeof(*entry) * entries); if (!mem) return NULL; + index = mem; + mem = index + 1; hash = mem; - entry = mem + hsize * sizeof(*hash); + mem = hash + hsize; + entry = mem; + + index->src_buf = buf; + index->src_size = bufsize; + index->hash_shift = hshift; memset(hash, 0, hsize * sizeof(*hash)); /* allocate an array to count hash entries */ hash_count = calloc(hsize, sizeof(*hash_count)); if (!hash_count) { - free(hash); + free(index); return NULL; } /* then populate the index */ - data = buf + entries * BLK_SIZE - BLK_SIZE; - while (data >= buf) { + data = buffer + entries * BLK_SIZE - BLK_SIZE; + while (data >= buffer) { unsigned int val = adler32(0, data, BLK_SIZE); i = HASH(val, hshift); entry->ptr = data; @@ -91,27 +108,18 @@ static struct index ** delta_index(const unsigned char *buf, * bucket that would bring us to O(m*n) computing costs (m and n * corresponding to reference and target buffer sizes). * - * The more the target buffer is large, the more it is important to - * have small entry lists for each hash buckets. With such a limit - * the cost is bounded to something more like O(m+n). - */ - hlimit = (1 << 26) / trg_bufsize; - if (hlimit < 4*BLK_SIZE) - hlimit = 4*BLK_SIZE; - - /* - * Now make sure none of the hash buckets has more entries than + * Make sure none of the hash buckets has more entries than * we're willing to test. Otherwise we cull the entry list * uniformly to still preserve a good repartition across * the reference buffer. */ for (i = 0; i < hsize; i++) { - if (hash_count[i] < hlimit) + if (hash_count[i] < HASH_LIMIT) continue; entry = hash[i]; do { - struct index *keep = entry; - int skip = hash_count[i] / hlimit / 2; + struct index_entry *keep = entry; + int skip = hash_count[i] / HASH_LIMIT / 2; do { entry = entry->next; } while(--skip && entry); @@ -120,7 +128,12 @@ static struct index ** delta_index(const unsigned char *buf, } free(hash_count); - return hash; + return index; +} + +void free_delta_index(struct delta_index *index) +{ + free(index); } /* provide the size of the copy opcode given the block offset and size */ @@ -131,21 +144,17 @@ static struct index ** delta_index(const unsigned char *buf, /* the maximum size for any opcode */ #define MAX_OP_SIZE COPYOP_SIZE(0xffffffff, 0xffffffff) -void *diff_delta(void *from_buf, unsigned long from_size, - void *to_buf, unsigned long to_size, - unsigned long *delta_size, - unsigned long max_size) +void * +create_delta(const struct delta_index *index, + const void *trg_buf, unsigned long trg_size, + unsigned long *delta_size, unsigned long max_size) { unsigned int i, outpos, outsize, hash_shift; int inscnt; const unsigned char *ref_data, *ref_top, *data, *top; unsigned char *out; - struct index *entry, **hash; - if (!from_size || !to_size) - return NULL; - hash = delta_index(from_buf, from_size, to_size, &hash_shift); - if (!hash) + if (!trg_buf || !trg_size) return NULL; outpos = 0; @@ -153,60 +162,55 @@ void *diff_delta(void *from_buf, unsigned long from_size, if (max_size && outsize >= max_size) outsize = max_size + MAX_OP_SIZE + 1; out = malloc(outsize); - if (!out) { - free(hash); + if (!out) return NULL; - } - - ref_data = from_buf; - ref_top = from_buf + from_size; - data = to_buf; - top = to_buf + to_size; /* store reference buffer size */ - out[outpos++] = from_size; - from_size >>= 7; - while (from_size) { - out[outpos - 1] |= 0x80; - out[outpos++] = from_size; - from_size >>= 7; + i = index->src_size; + while (i >= 0x80) { + out[outpos++] = i | 0x80; + i >>= 7; } + out[outpos++] = i; /* store target buffer size */ - out[outpos++] = to_size; - to_size >>= 7; - while (to_size) { - out[outpos - 1] |= 0x80; - out[outpos++] = to_size; - to_size >>= 7; + i = trg_size; + while (i >= 0x80) { + out[outpos++] = i | 0x80; + i >>= 7; } + out[outpos++] = i; + ref_data = index->src_buf; + ref_top = ref_data + index->src_size; + data = trg_buf; + top = trg_buf + trg_size; + hash_shift = index->hash_shift; inscnt = 0; while (data < top) { unsigned int moff = 0, msize = 0; - if (data + BLK_SIZE <= top) { - unsigned int val = adler32(0, data, BLK_SIZE); - i = HASH(val, hash_shift); - for (entry = hash[i]; entry; entry = entry->next) { - const unsigned char *ref = entry->ptr; - const unsigned char *src = data; - unsigned int ref_size = ref_top - ref; - if (entry->val != val) - continue; - if (ref_size > top - src) - ref_size = top - src; - if (ref_size > 0x10000) - ref_size = 0x10000; - if (ref_size <= msize) - break; - while (ref_size-- && *src++ == *ref) - ref++; - if (msize < ref - entry->ptr) { - /* this is our best match so far */ - msize = ref - entry->ptr; - moff = entry->ptr - ref_data; - } + struct index_entry *entry; + unsigned int val = adler32(0, data, BLK_SIZE); + i = HASH(val, hash_shift); + for (entry = index->hash[i]; entry; entry = entry->next) { + const unsigned char *ref = entry->ptr; + const unsigned char *src = data; + unsigned int ref_size = ref_top - ref; + if (entry->val != val) + continue; + if (ref_size > top - src) + ref_size = top - src; + if (ref_size > 0x10000) + ref_size = 0x10000; + if (ref_size <= msize) + break; + while (ref_size-- && *src++ == *ref) + ref++; + if (msize < ref - entry->ptr) { + /* this is our best match so far */ + msize = ref - entry->ptr; + moff = entry->ptr - ref_data; } } @@ -271,7 +275,6 @@ void *diff_delta(void *from_buf, unsigned long from_size, out = realloc(out, outsize); if (!out) { free(tmp); - free(hash); return NULL; } } @@ -280,7 +283,6 @@ void *diff_delta(void *from_buf, unsigned long from_size, if (inscnt) out[outpos - inscnt - 1] = inscnt; - free(hash); *delta_size = outpos; return out; } diff --git a/dump-cache-tree.c b/dump-cache-tree.c new file mode 100644 index 0000000000..f6a19bac3d --- /dev/null +++ b/dump-cache-tree.c @@ -0,0 +1,31 @@ +#include "cache.h" +#include "tree.h" +#include "cache-tree.h" + +static void dump_cache_tree(struct cache_tree *it, const char *pfx) +{ + int i; + if (!it) + return; + if (it->entry_count < 0) + printf("%-40s %s (%d subtrees)\n", "invalid", pfx, + it->subtree_nr); + else + printf("%s %s (%d entries, %d subtrees)\n", + sha1_to_hex(it->sha1), + pfx, it->entry_count, it->subtree_nr); + for (i = 0; i < it->subtree_nr; i++) { + char path[PATH_MAX]; + struct cache_tree_sub *down = it->down[i]; + sprintf(path, "%s%.*s/", pfx, down->namelen, down->name); + dump_cache_tree(down->cache_tree, path); + } +} + +int main(int ac, char **av) +{ + if (read_cache() < 0) + die("unable to read index file"); + dump_cache_tree(active_cache_tree, ""); + return 0; +} diff --git a/fsck-objects.c b/fsck-objects.c index 59b25904cb..cc09143a92 100644 --- a/fsck-objects.c +++ b/fsck-objects.c @@ -8,6 +8,7 @@ #include "tag.h" #include "refs.h" #include "pack.h" +#include "cache-tree.h" #define REACHABLE 0x0001 @@ -438,6 +439,21 @@ static int fsck_head_link(void) return 0; } +static int fsck_cache_tree(struct cache_tree *it) +{ + int i; + int err = 0; + + if (0 <= it->entry_count) { + struct object *obj = parse_object(it->sha1); + if (obj->type != tree_type) + err |= objerror(obj, "non-tree in cache-tree"); + } + for (i = 0; i < it->subtree_nr; i++) + err |= fsck_cache_tree(it->down[i]->cache_tree); + return err; +} + int main(int argc, char **argv) { int i, heads; @@ -547,6 +563,8 @@ int main(int argc, char **argv) obj->used = 1; mark_reachable(obj, REACHABLE); } + if (active_cache_tree) + fsck_cache_tree(active_cache_tree); } check_connectivity(); diff --git a/git-cvsserver.perl b/git-cvsserver.perl index 7d3f78e375..0b37d26e07 100755 --- a/git-cvsserver.perl +++ b/git-cvsserver.perl @@ -171,11 +171,11 @@ sub req_Root return 0; } - my @gitvars = `git-var -l`; + my @gitvars = `git-repo-config -l`; if ($?) { - print "E problems executing git-var on the server -- this is not a git repository or the PATH is not set correcly.\n"; + print "E problems executing git-repo-config on the server -- this is not a git repository or the PATH is not set correcly.\n"; print "E \n"; - print "error 1 - problem executing git-var\n"; + print "error 1 - problem executing git-repo-config\n"; return 0; } foreach my $line ( @gitvars ) @@ -47,6 +47,7 @@ static void handle_internal_command(int argc, const char **argv, char **envp) { "log", cmd_log }, { "whatchanged", cmd_whatchanged }, { "show", cmd_show }, + { "fmt-patch", cmd_format_patch }, }; int i; diff --git a/gsimm.c b/gsimm.c new file mode 100644 index 0000000000..7024bf8f58 --- /dev/null +++ b/gsimm.c @@ -0,0 +1,94 @@ +#include "rabinpoly.h" +#include "gsimm.h" + +/* Has to be power of two. Since the Rabin hash only has 63 + usable bits, the number of hashes is limited to 32. + Lower powers of two could be used for speeding up processing + of very large files. */ +#define NUM_HASHES_PER_CHAR 32 + +/* Size of cache used to eliminate duplicate substrings. + Make small enough to comfortably fit in L1 cache. */ +#define DUP_CACHE_SIZE 256 + +/* For the final counting, do not count each bit individually, but + group them. Must be power of two, at most NUM_HASHES_PER_CHAR. + However, larger sizes result in higher cache usage. Use 8 bits + per group for efficient processing of large files on fast machines + with decent caches, or 4 bits for faster processing of small files + and for machines with small caches. */ +#define GROUP_BITS 4 +#define GROUP_COUNTERS (1<<GROUP_BITS) + +static void freq_to_md(u_char *md, int *freq) +{ int j, k; + + for (j = 0; j < MD_LENGTH; j++) + { u_char ch = 0; + + for (k = 0; k < 8; k++) ch = 2*ch + (freq[8*j+k] > 0); + md[j] = ch; + } + bzero (freq, sizeof(freq[0]) * MD_BITS); +} + +void gb_simm_process(u_char *data, unsigned len, u_char *md) +{ size_t j = 0; + u_int32_t ofs; + u_int32_t dup_cache[DUP_CACHE_SIZE]; + u_int32_t count [MD_BITS * (GROUP_COUNTERS/GROUP_BITS)]; + int freq[MD_BITS]; + + bzero (freq, sizeof(freq[0]) * MD_BITS); + bzero (dup_cache, DUP_CACHE_SIZE * sizeof (u_int32_t)); + bzero (count, (MD_BITS * (GROUP_COUNTERS/GROUP_BITS) * sizeof (u_int32_t))); + + /* Ignore incomplete substrings */ + while (j < len && j < RABIN_WINDOW_SIZE) rabin_slide8 (data[j++]); + + while (j < len) + { u_int64_t hash; + u_int32_t ofs, sum; + u_char idx; + int k; + + hash = rabin_slide8 (data[j++]); + + /* In order to update a much larger frequency table + with only 32 bits of checksum, randomly select a + part of the table to update. The selection should + only depend on the content of the represented data, + and be independent of the bits used for the update. + + Instead of updating 32 individual counters, process + the checksum in MD_BITS / GROUP_BITS groups of + GROUP_BITS bits, and count the frequency of each bit pattern. + */ + + idx = (hash >> 32); + sum = (u_int32_t) hash; + ofs = idx % (MD_BITS / NUM_HASHES_PER_CHAR) * NUM_HASHES_PER_CHAR; + idx %= DUP_CACHE_SIZE; + if (dup_cache[idx] != sum) + { dup_cache[idx] = sum; + for (k = 0; k < NUM_HASHES_PER_CHAR / GROUP_BITS; k++) + { count[ofs * GROUP_COUNTERS / GROUP_BITS + (sum % GROUP_COUNTERS)]++; + ofs += GROUP_BITS; + sum >>= GROUP_BITS; + } } } + + /* Distribute the occurrences of each bit group over the frequency table. */ + for (ofs = 0; ofs < MD_BITS; ofs += GROUP_BITS) + { int j; + for (j = 0; j < GROUP_COUNTERS; j++) + { int k; + for (k = 0; k < GROUP_BITS; k++) + { freq[ofs + k] += ((1<<k) & j) + ? count[ofs * GROUP_COUNTERS / GROUP_BITS + j] + : -count[ofs * GROUP_COUNTERS / GROUP_BITS + j]; + } } } + + if (md) + { rabin_reset(); + freq_to_md (md, freq); +} } diff --git a/gsimm.h b/gsimm.h new file mode 100644 index 0000000000..4b023b91a9 --- /dev/null +++ b/gsimm.h @@ -0,0 +1,28 @@ +#ifndef GSIMM_H +#define GSIMM_H + +/* Length of file message digest (MD) in bytes. Longer MD's are + better, but increase processing time for diminishing returns. + Must be multiple of NUM_HASHES_PER_CHAR / 8, and at least 24 + for good results +*/ +#define MD_LENGTH 32 +#define MD_BITS (MD_LENGTH * 8) + +/* The MIN_FILE_SIZE indicates the absolute minimal file size that + can be processed. As indicated above, the first and last + RABIN_WINDOW_SIZE - 1 bytes are skipped. + In order to get at least an average of 12 samples + per bit in the final message digest, require at least 3 * MD_LENGTH + complete windows in the file. */ +#define MIN_FILE_SIZE (3 * MD_LENGTH + 2 * (RABIN_WINDOW_SIZE - 1)) + +/* Limit matching algorithm to files less than 256 MB, so we can use + 32 bit integers everywhere without fear of overflow. For larger + files we should add logic to mmap the file by piece and accumulate + the frequency counts. */ +#define MAX_FILE_SIZE (256*1024*1024 - 1) + +void gb_simm_process(u_char *data, unsigned len, u_char *md); + +#endif diff --git a/log-tree.c b/log-tree.c index 9634c4677f..aaf2b9423f 100644 --- a/log-tree.c +++ b/log-tree.c @@ -37,12 +37,20 @@ void show_log(struct rev_info *opt, struct log_info *log, const char *sep) /* * Print header line of header.. */ - printf("%s%s", - opt->commit_format == CMIT_FMT_ONELINE ? "" : "commit ", - diff_unique_abbrev(commit->object.sha1, abbrev_commit)); - if (parent) - printf(" (from %s)", diff_unique_abbrev(parent->object.sha1, abbrev_commit)); - putchar(opt->commit_format == CMIT_FMT_ONELINE ? ' ' : '\n'); + + if (opt->commit_format == CMIT_FMT_EMAIL) + printf("From %s Thu Apr 7 15:13:13 2005\n", + sha1_to_hex(commit->object.sha1)); + else { + printf("%s%s", + opt->commit_format == CMIT_FMT_ONELINE ? "" : "commit ", + diff_unique_abbrev(commit->object.sha1, abbrev_commit)); + if (parent) + printf(" (from %s)", + diff_unique_abbrev(parent->object.sha1, + abbrev_commit)); + putchar(opt->commit_format == CMIT_FMT_ONELINE ? ' ' : '\n'); + } /* * And then the pretty-printed message itself @@ -152,15 +160,18 @@ static int log_tree_diff(struct rev_info *opt, struct commit *commit, struct log int log_tree_commit(struct rev_info *opt, struct commit *commit) { struct log_info log; + int shown; log.commit = commit; log.parent = NULL; opt->loginfo = &log; - if (!log_tree_diff(opt, commit, &log) && opt->loginfo && opt->always_show_header) { + shown = log_tree_diff(opt, commit, &log); + if (!shown && opt->loginfo && opt->always_show_header) { log.parent = NULL; show_log(opt, opt->loginfo, ""); + shown = 1; } opt->loginfo = NULL; - return 0; + return shown; } diff --git a/patch-delta.c b/patch-delta.c index d95f0d9721..8f318ed8aa 100644 --- a/patch-delta.c +++ b/patch-delta.c @@ -13,8 +13,8 @@ #include <string.h> #include "delta.h" -void *patch_delta(void *src_buf, unsigned long src_size, - void *delta_buf, unsigned long delta_size, +void *patch_delta(const void *src_buf, unsigned long src_size, + const void *delta_buf, unsigned long delta_size, unsigned long *dst_size) { const unsigned char *data, *top; diff --git a/rabinpoly.c b/rabinpoly.c new file mode 100644 index 0000000000..c26f3829c3 --- /dev/null +++ b/rabinpoly.c @@ -0,0 +1,154 @@ +/* + * + * Copyright (C) 1999 David Mazieres (dm@uun.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + + /* Faster generic_fls */ + /* (c) 2002, D.Phillips and Sistina Software */ + +#include "rabinpoly.h" +#define MSB64 0x8000000000000000ULL + +static inline unsigned fls8(unsigned n) +{ + return n & 0xf0? + n & 0xc0? (n >> 7) + 7: (n >> 5) + 5: + n & 0x0c? (n >> 3) + 3: n - ((n + 1) >> 2); +} + +static inline unsigned fls16(unsigned n) +{ + return n & 0xff00? fls8(n >> 8) + 8: fls8(n); +} + +static inline unsigned fls32(unsigned n) +{ + return n & 0xffff0000? fls16(n >> 16) + 16: fls16(n); +} + +static inline unsigned fls64(unsigned long long n) /* should be u64 */ +{ + return n & 0xffffffff00000000ULL? fls32(n >> 32) + 32: fls32(n); +} + + +static u_int64_t polymod (u_int64_t nh, u_int64_t nl, u_int64_t d); +static void polymult (u_int64_t *php, u_int64_t *plp, + u_int64_t x, u_int64_t y); +static u_int64_t polymmult (u_int64_t x, u_int64_t y, u_int64_t d); + +static u_int64_t poly = 0xb15e234bd3792f63ull; // Actual polynomial +static u_int64_t T[256]; // Lookup table for mod +static int shift; + +u_int64_t append8 (u_int64_t p, u_char m) +{ return ((p << 8) | m) ^ T[p >> shift]; +} + +static u_int64_t +polymod (u_int64_t nh, u_int64_t nl, u_int64_t d) +{ int i, k; + assert (d); + k = fls64 (d) - 1; + d <<= 63 - k; + + if (nh) { + if (nh & MSB64) + nh ^= d; + for (i = 62; i >= 0; i--) + if (nh & 1ULL << i) { + nh ^= d >> (63 - i); + nl ^= d << (i + 1); + } + } + for (i = 63; i >= k; i--) + if (nl & 1ULL << i) + nl ^= d >> (63 - i); + return nl; +} + +static void +polymult (u_int64_t *php, u_int64_t *plp, u_int64_t x, u_int64_t y) +{ int i; + u_int64_t ph = 0, pl = 0; + if (x & 1) + pl = y; + for (i = 1; i < 64; i++) + if (x & (1ULL << i)) { + ph ^= y >> (64 - i); + pl ^= y << i; + } + if (php) + *php = ph; + if (plp) + *plp = pl; +} + +static u_int64_t +polymmult (u_int64_t x, u_int64_t y, u_int64_t d) +{ + u_int64_t h, l; + polymult (&h, &l, x, y); + return polymod (h, l, d); +} + +static int size = RABIN_WINDOW_SIZE; +static u_int64_t fingerprint = 0; +static int bufpos = -1; +static u_int64_t U[256]; +static u_char buf[RABIN_WINDOW_SIZE]; + +void rabin_init () +{ u_int64_t sizeshift = 1; + u_int64_t T1; + int xshift; + int i, j; + assert (poly >= 0x100); + xshift = fls64 (poly) - 1; + shift = xshift - 8; + T1 = polymod (0, 1ULL << xshift, poly); + for (j = 0; j < 256; j++) + T[j] = polymmult (j, T1, poly) | ((u_int64_t) j << xshift); + + for (i = 1; i < size; i++) + sizeshift = append8 (sizeshift, 0); + for (i = 0; i < 256; i++) + U[i] = polymmult (i, sizeshift, poly); + bzero (buf, sizeof (buf)); +} + +void +rabin_reset () +{ rabin_init(); + fingerprint = 0; + bzero (buf, sizeof (buf)); +} + +u_int64_t +rabin_slide8 (u_char m) +{ u_char om; + if (++bufpos >= size) bufpos = 0; + + om = buf[bufpos]; + buf[bufpos] = m; + fingerprint = append8 (fingerprint ^ U[om], m); + + return fingerprint; +} + diff --git a/rabinpoly.h b/rabinpoly.h new file mode 100644 index 0000000000..a19a097459 --- /dev/null +++ b/rabinpoly.h @@ -0,0 +1,31 @@ +/* + * + * Copyright (C) 2000 David Mazieres (dm@uun.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + * Translated to C and simplified by Geert Bosch (bosch@gnat.com) + */ + +#include <assert.h> +#include <strings.h> +#include <sys/types.h> + +#ifndef RABIN_WINDOW_SIZE +#define RABIN_WINDOW_SIZE 48 +#endif +void rabin_reset(); +u_int64_t rabin_slide8(u_char c); diff --git a/read-cache.c b/read-cache.c index f97f92d90a..1f71d12578 100644 --- a/read-cache.c +++ b/read-cache.c @@ -4,11 +4,26 @@ * Copyright (C) Linus Torvalds, 2005 */ #include "cache.h" +#include "cache-tree.h" + +/* Index extensions. + * + * The first letter should be 'A'..'Z' for extensions that are not + * necessary for a correct operation (i.e. optimization data). + * When new extensions are added that _needs_ to be understood in + * order to correctly interpret the index file, pick character that + * is outside the range, to cause the reader to abort. + */ + +#define CACHE_EXT(s) ( (s[0]<<24)|(s[1]<<16)|(s[2]<<8)|(s[3]) ) +#define CACHE_EXT_TREE 0x54524545 /* "TREE" */ struct cache_entry **active_cache = NULL; static time_t index_file_timestamp; unsigned int active_nr = 0, active_alloc = 0, active_cache_changed = 0; +struct cache_tree *active_cache_tree = NULL; + /* * This only updates the "non-critical" parts of the directory * cache, ie the parts that aren't tracked by GIT, and only used @@ -513,6 +528,22 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size) return 0; } +static int read_index_extension(const char *ext, void *data, unsigned long sz) +{ + switch (CACHE_EXT(ext)) { + case CACHE_EXT_TREE: + active_cache_tree = cache_tree_read(data, sz); + break; + default: + if (*ext < 'A' || 'Z' < *ext) + return error("index uses %.4s extension, which we do not understand", + ext); + fprintf(stderr, "ignoring %.4s extension\n", ext); + break; + } + return 0; +} + int read_cache(void) { int fd, i; @@ -561,6 +592,22 @@ int read_cache(void) active_cache[i] = ce; } index_file_timestamp = st.st_mtime; + while (offset <= size - 20 - 8) { + /* After an array of active_nr index entries, + * there can be arbitrary number of extended + * sections, each of which is prefixed with + * extension name (4-byte) and section length + * in 4-byte network byte order. + */ + unsigned long extsize; + memcpy(&extsize, map + offset + 4, 4); + extsize = ntohl(extsize); + if (read_index_extension(map + offset, + map + offset + 8, extsize) < 0) + goto unmap; + offset += 8; + offset += extsize; + } return active_nr; unmap: @@ -595,6 +642,17 @@ static int ce_write(SHA_CTX *context, int fd, void *data, unsigned int len) return 0; } +static int write_index_ext_header(SHA_CTX *context, int fd, + unsigned long ext, unsigned long sz) +{ + ext = htonl(ext); + sz = htonl(sz); + if ((ce_write(context, fd, &ext, 4) < 0) || + (ce_write(context, fd, &sz, 4) < 0)) + return -1; + return 0; +} + static int ce_flush(SHA_CTX *context, int fd) { unsigned int left = write_buffer_len; @@ -691,5 +749,19 @@ int write_cache(int newfd, struct cache_entry **cache, int entries) if (ce_write(&c, newfd, ce, ce_size(ce)) < 0) return -1; } + + /* Write extension data here */ + if (active_cache_tree) { + unsigned long sz; + void *data = cache_tree_write(active_cache_tree, &sz); + if (data && + !write_index_ext_header(&c, newfd, CACHE_EXT_TREE, sz) && + !ce_write(&c, newfd, data, sz)) + ; + else { + free(data); + return -1; + } + } return ce_flush(&c, newfd); } diff --git a/read-tree.c b/read-tree.c index 26f4f7e323..1c65101291 100644 --- a/read-tree.c +++ b/read-tree.c @@ -9,6 +9,7 @@ #include "object.h" #include "tree.h" +#include "cache-tree.h" #include <sys/time.h> #include <signal.h> @@ -828,6 +829,7 @@ int main(int argc, char **argv) } unpack_trees(fn); + cache_tree_free(&active_cache_tree); if (write_cache(newfd, active_cache, active_nr) || commit_index_file(&cache_file)) die("unable to write new index file"); diff --git a/repo-config.c b/repo-config.c index c5ebb7668a..fa8aba7a1b 100644 --- a/repo-config.c +++ b/repo-config.c @@ -2,7 +2,7 @@ #include <regex.h> static const char git_config_set_usage[] = -"git-repo-config [ --bool | --int ] [--get | --get-all | --replace-all | --unset | --unset-all] name [value [value_regex]]"; +"git-repo-config [ --bool | --int ] [--get | --get-all | --replace-all | --unset | --unset-all] name [value [value_regex]] | --list"; static char* key = NULL; static char* value = NULL; @@ -12,6 +12,15 @@ static int do_not_match = 0; static int seen = 0; static enum { T_RAW, T_INT, T_BOOL } type = T_RAW; +static int show_all_config(const char *key_, const char *value_) +{ + if (value_) + printf("%s=%s\n", key_, value_); + else + printf("%s\n", key_); + return 0; +} + static int show_config(const char* key_, const char* value_) { if (value_ == NULL) @@ -67,7 +76,7 @@ static int get_value(const char* key_, const char* regex_) } } - i = git_config(show_config); + git_config(show_config); if (value) { printf("%s\n", value); free(value); @@ -99,6 +108,9 @@ int main(int argc, const char **argv) argv++; } + if (!strcmp(argv[1], "--list") || !strcmp(argv[1], "-l")) + return git_config(show_all_config); + switch (argc) { case 2: return get_value(argv[1], NULL); diff --git a/test-gsimm.c b/test-gsimm.c new file mode 100644 index 0000000000..bd28b7da28 --- /dev/null +++ b/test-gsimm.c @@ -0,0 +1,209 @@ +#include <unistd.h> +#include <stdlib.h> +#include <fcntl.h> +#include <libgen.h> +#include <stdio.h> +#include <assert.h> +#include <math.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> + +#include "rabinpoly.h" +#include "gsimm.h" + +#define MIN(x,y) ((y)<(x) ? (y) : (x)) +#define MAX(x,y) ((y)>(x) ? (y) : (x)) + +/* The RABIN_WINDOW_SIZE is the size of fingerprint window used by + Rabin algorithm. This is not a modifiable parameter. + + The first RABIN_WINDOW_SIZE - 1 bytes are skipped, in order to ensure + fingerprints are good hashes. This does somewhat reduce the + influence of the first few bytes in the file (they're part of + fewer windows, like the last few bytes), but that actually isn't + so bad as files often start with fixed content that may bias comparisons. +*/ + +typedef struct fileinfo +{ char *name; + size_t length; + u_char md[MD_LENGTH]; + int match; +} File; + +int flag_verbose = 0; +int flag_debug = 0; +char *flag_relative = 0; + +char cmd[12] = " ..."; +char md_strbuf[MD_LENGTH * 2 + 1]; +u_char relative_md [MD_LENGTH]; + +File *file; +int file_count; +size_t file_bytes; + +char hex[17] = "0123456789abcdef"; + +void usage() +{ fprintf (stderr, "usage: %s [-dhvw] [-r fingerprint] file ...\n", cmd); + fprintf (stderr, " -d\tdebug output, repeate for more verbosity\n"); + fprintf (stderr, " -h\tshow this usage information\n"); + fprintf (stderr, " -r\tshow distance relative to fingerprint " + "(%u hex digits)\n", MD_LENGTH * 2); + fprintf (stderr, " -v\tverbose output, repeat for even more verbosity\n"); + fprintf (stderr, " -w\tenable warnings for suspect statistics\n"); + exit (1); +} + +int dist (u_char *l, u_char *r) +{ int j, k; + int d = 0; + + for (j = 0; j < MD_LENGTH; j++) + { u_char ch = l[j] ^ r[j]; + + for (k = 0; k < 8; k++) d += ((ch & (1<<k)) > 0); + } + + return d; +} + +char *md_to_str(u_char *md) +{ int j; + + for (j = 0; j < MD_LENGTH; j++) + { u_char ch = md[j]; + + md_strbuf[j*2] = hex[ch >> 4]; + md_strbuf[j*2+1] = hex[ch & 0xF]; + } + + md_strbuf[j*2] = 0; + return md_strbuf; +} + +void process_file (char *name) +{ int fd; + struct stat fs; + u_char *data; + File *fi = file+file_count;; + + fd = open (name, O_RDONLY, 0); + if (fd < 0) + { perror (name); + exit (2); + } + + if (fstat (fd, &fs)) + { perror (name); + exit (2); + } + + if (fs.st_size >= MIN_FILE_SIZE + && fs.st_size <= MAX_FILE_SIZE) + { fi->length = fs.st_size; + fi->name = name; + + data = (u_char *) mmap (0, fs.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + + if (data == (u_char *) -1) + { perror (name); + exit (2); + } + + gb_simm_process (data, fs.st_size, fi->md); + if (flag_relative) + { int d = dist (fi->md, relative_md); + double sim = 1.0 - MIN (1.0, (double) (d) / (MD_LENGTH * 4 - 1)); + fprintf (stdout, "%s %llu %u %s %u %3.1f\n", + md_to_str (fi->md), (long long unsigned) 0, + (unsigned) fs.st_size, name, + d, 100.0 * sim); + } + else + { + fprintf (stdout, "%s %llu %u %s\n", + md_to_str (fi->md), (long long unsigned) 0, + (unsigned) fs.st_size, name); + } + munmap (data, fs.st_size); + file_bytes += fs.st_size; + file_count++; + } else if (flag_verbose) + { fprintf (stdout, "skipping %s (size %llu)\n", name, (long long unsigned) fs.st_size); } + + close (fd); +} + +u_char *str_to_md(char *str, u_char *md) +{ int j; + + if (!md || !str) return 0; + + bzero (md, MD_LENGTH); + + for (j = 0; j < MD_LENGTH * 2; j++) + { char ch = str[j]; + + if (ch >= '0' && ch <= '9') + { md [j/2] = (md [j/2] << 4) + (ch - '0'); + } + else + { ch |= 32; + + if (ch < 'a' || ch > 'f') break; + md [j/2] = (md[j/2] << 4) + (ch - 'a' + 10); + } } + + return (j != MD_LENGTH * 2 || str[j] != 0) ? 0 : md; +} + +int main (int argc, char *argv[]) +{ int ch, j; + + strncpy (cmd, basename (argv[0]), 8); + + while ((ch = getopt(argc, argv, "dhr:vw")) != -1) + { switch (ch) + { case 'd': flag_debug++; + break; + case 'r': if (!optarg) + { fprintf (stderr, "%s: missing argument for -r\n", cmd); + return 1; + } + if (str_to_md (optarg, relative_md)) flag_relative = optarg; + else + { fprintf (stderr, "%s: not a valid fingerprint\n", optarg); + return 1; + } + break; + case 'v': flag_verbose++; + break; + case 'w': break; + default : usage(); + return (ch != 'h'); + } } + + argc -= optind; + argv += optind; + + if (argc == 0) usage(); + + rabin_reset (); + if (flag_verbose && flag_relative) + { fprintf (stdout, "distances are relative to %s\n", flag_relative); + } + + file = (File *) calloc (argc, sizeof (File)); + + for (j = 0; j < argc; j++) process_file (argv[j]); + + if (flag_verbose) + { fprintf (stdout, "%li bytes in %i files\n", (long) file_bytes, file_count); + } + + return 0; +} diff --git a/update-index.c b/update-index.c index facec8d915..157488ff1a 100644 --- a/update-index.c +++ b/update-index.c @@ -7,6 +7,7 @@ #include "strbuf.h" #include "quote.h" #include "tree-walk.h" +#include "cache-tree.h" /* * Default to not allowing changes to the list of files. The @@ -71,6 +72,7 @@ static int mark_valid(const char *path) active_cache[pos]->ce_flags &= ~htons(CE_VALID); break; } + cache_tree_invalidate_path(active_cache_tree, path); active_cache_changed = 1; return 0; } @@ -84,6 +86,12 @@ static int add_file_to_cache(const char *path) struct stat st; status = lstat(path, &st); + + /* We probably want to do this in remove_file_from_cache() and + * add_cache_entry() instead... + */ + cache_tree_invalidate_path(active_cache_tree, path); + if (status < 0 || S_ISDIR(st.st_mode)) { /* When we used to have "path" and now we want to add * "path/file", we need a way to remove "path" before @@ -326,6 +334,7 @@ static int add_cacheinfo(unsigned int mode, const unsigned char *sha1, return error("%s: cannot add to the index - missing --add option?", path); report("add '%s'", path); + cache_tree_invalidate_path(active_cache_tree, path); return 0; } @@ -350,6 +359,7 @@ static void chmod_path(int flip, const char *path) default: goto fail; } + cache_tree_invalidate_path(active_cache_tree, path); active_cache_changed = 1; report("chmod %cx '%s'", flip, path); return; @@ -371,6 +381,7 @@ static void update_one(const char *path, const char *prefix, int prefix_length) die("Unable to mark file %s", path); return; } + cache_tree_invalidate_path(active_cache_tree, path); if (force_remove) { if (remove_file_from_cache(p)) @@ -446,6 +457,7 @@ static void read_index_info(int line_termination) free(path_name); continue; } + cache_tree_invalidate_path(active_cache_tree, path_name); if (!mode) { /* mode == 0 means there is no such path -- remove */ @@ -550,6 +562,7 @@ static int unresolve_one(const char *path) goto free_return; } + cache_tree_invalidate_path(active_cache_tree, path); remove_file_from_cache(path); if (add_cache_entry(ce_2, ADD_CACHE_OK_TO_ADD)) { error("%s: cannot add our version to the index.", path); diff --git a/write-tree.c b/write-tree.c index dcad6e6670..a5069921a0 100644 --- a/write-tree.c +++ b/write-tree.c @@ -5,95 +5,21 @@ */ #include "cache.h" #include "tree.h" +#include "cache-tree.h" static int missing_ok = 0; -static int check_valid_sha1(unsigned char *sha1) -{ - int ret; - - /* If we were anal, we'd check that the sha1 of the contents actually matches */ - ret = has_sha1_file(sha1); - if (ret == 0) - perror(sha1_file_name(sha1)); - return ret ? 0 : -1; -} - -static int write_tree(struct cache_entry **cachep, int maxentries, const char *base, int baselen, unsigned char *returnsha1) -{ - unsigned char subdir_sha1[20]; - unsigned long size, offset; - char *buffer; - int nr; - - /* Guess at some random initial size */ - size = 8192; - buffer = xmalloc(size); - offset = 0; - - nr = 0; - while (nr < maxentries) { - struct cache_entry *ce = cachep[nr]; - const char *pathname = ce->name, *filename, *dirname; - int pathlen = ce_namelen(ce), entrylen; - unsigned char *sha1; - unsigned int mode; - - /* Did we hit the end of the directory? Return how many we wrote */ - if (baselen >= pathlen || memcmp(base, pathname, baselen)) - break; - - sha1 = ce->sha1; - mode = ntohl(ce->ce_mode); - - /* Do we have _further_ subdirectories? */ - filename = pathname + baselen; - dirname = strchr(filename, '/'); - if (dirname) { - int subdir_written; - - subdir_written = write_tree(cachep + nr, maxentries - nr, pathname, dirname-pathname+1, subdir_sha1); - nr += subdir_written; - - /* Now we need to write out the directory entry into this tree.. */ - mode = S_IFDIR; - pathlen = dirname - pathname; - - /* ..but the directory entry doesn't count towards the total count */ - nr--; - sha1 = subdir_sha1; - } - - if (!missing_ok && check_valid_sha1(sha1) < 0) - exit(1); - - entrylen = pathlen - baselen; - if (offset + entrylen + 100 > size) { - size = alloc_nr(offset + entrylen + 100); - buffer = xrealloc(buffer, size); - } - offset += sprintf(buffer + offset, "%o %.*s", mode, entrylen, filename); - buffer[offset++] = 0; - memcpy(buffer + offset, sha1, 20); - offset += 20; - nr++; - } - - write_sha1_file(buffer, offset, tree_type, returnsha1); - free(buffer); - return nr; -} - static const char write_tree_usage[] = "git-write-tree [--missing-ok]"; +static struct cache_file cache_file; + int main(int argc, char **argv) { - int i, funny; - int entries; - unsigned char sha1[20]; - + int entries, was_valid, newfd; + setup_git_directory(); + newfd = hold_index_file_for_update(&cache_file, get_index_file()); entries = read_cache(); if (argc == 2) { if (!strcmp(argv[1], "--missing-ok")) @@ -108,51 +34,26 @@ int main(int argc, char **argv) if (entries < 0) die("git-write-tree: error reading cache"); - /* Verify that the tree is merged */ - funny = 0; - for (i = 0; i < entries; i++) { - struct cache_entry *ce = active_cache[i]; - if (ce_stage(ce)) { - if (10 < ++funny) { - fprintf(stderr, "...\n"); - break; - } - fprintf(stderr, "%s: unmerged (%s)\n", ce->name, sha1_to_hex(ce->sha1)); + if (!active_cache_tree) + active_cache_tree = cache_tree(); + + was_valid = cache_tree_fully_valid(active_cache_tree); + if (!was_valid) { + if (cache_tree_update(active_cache_tree, + active_cache, active_nr, + missing_ok) < 0) + die("git-write-tree: error building trees"); + if (0 <= newfd) { + if (!write_cache(newfd, active_cache, active_nr)) + commit_index_file(&cache_file); } - } - if (funny) - die("git-write-tree: not able to write tree"); - - /* Also verify that the cache does not have path and path/file - * at the same time. At this point we know the cache has only - * stage 0 entries. - */ - funny = 0; - for (i = 0; i < entries - 1; i++) { - /* path/file always comes after path because of the way - * the cache is sorted. Also path can appear only once, - * which means conflicting one would immediately follow. + /* Not being able to write is fine -- we are only interested + * in updating the cache-tree part, and if the next caller + * ends up using the old index with unupdated cache-tree part + * it misses the work we did here, but that is just a + * performance penalty and not a big deal. */ - const char *this_name = active_cache[i]->name; - const char *next_name = active_cache[i+1]->name; - int this_len = strlen(this_name); - if (this_len < strlen(next_name) && - strncmp(this_name, next_name, this_len) == 0 && - next_name[this_len] == '/') { - if (10 < ++funny) { - fprintf(stderr, "...\n"); - break; - } - fprintf(stderr, "You have both %s and %s\n", - this_name, next_name); - } } - if (funny) - die("git-write-tree: not able to write tree"); - - /* Ok, write it out */ - if (write_tree(active_cache, entries, "", 0, sha1) != entries) - die("git-write-tree: internal error"); - printf("%s\n", sha1_to_hex(sha1)); + printf("%s\n", sha1_to_hex(active_cache_tree->sha1)); return 0; } |