diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | Makefile | 11 | ||||
-rw-r--r-- | apply.c | 5 | ||||
-rw-r--r-- | builtin-log.c | 39 | ||||
-rw-r--r-- | builtin.h | 1 | ||||
-rw-r--r-- | cache-tree.c | 463 | ||||
-rw-r--r-- | cache-tree.h | 30 | ||||
-rw-r--r-- | cache.h | 1 | ||||
-rw-r--r-- | checkout-index.c | 1 | ||||
-rw-r--r-- | commit.c | 37 | ||||
-rw-r--r-- | commit.h | 1 | ||||
-rw-r--r-- | diff-files.c | 212 | ||||
-rw-r--r-- | diff-index.c | 252 | ||||
-rw-r--r-- | diff-lib.c | 1963 | ||||
-rw-r--r-- | diff.c | 1795 | ||||
-rw-r--r-- | diff.h | 9 | ||||
-rw-r--r-- | dump-cache-tree.c | 30 | ||||
-rw-r--r-- | git.c | 1 | ||||
-rw-r--r-- | gsimm.c | 94 | ||||
-rw-r--r-- | gsimm.h | 28 | ||||
-rw-r--r-- | log-tree.c | 27 | ||||
-rw-r--r-- | rabinpoly.c | 154 | ||||
-rw-r--r-- | rabinpoly.h | 31 | ||||
-rw-r--r-- | read-cache.c | 72 | ||||
-rw-r--r-- | read-tree.c | 2 | ||||
-rwxr-xr-x | t/t1001-read-tree-m-2way.sh | 2 | ||||
-rwxr-xr-x | t/t1002-read-tree-m-u-2way.sh | 2 | ||||
-rwxr-xr-x | t/t4010-diff-pathspec.sh | 10 | ||||
-rw-r--r-- | test-gsimm.c | 209 | ||||
-rw-r--r-- | update-index.c | 13 | ||||
-rw-r--r-- | write-tree.c | 147 |
31 files changed, 3370 insertions, 2274 deletions
diff --git a/.gitignore b/.gitignore index b5959d6311..cdb2d53318 100644 --- a/.gitignore +++ b/.gitignore @@ -123,6 +123,8 @@ git-write-tree git-core-*/?* test-date test-delta +test-dump-cache-tree +test-gsimm common-cmds.h *.tar.gz *.dsc @@ -199,12 +199,12 @@ LIB_H = \ tree-walk.h log-tree.h DIFF_OBJS = \ - diff-lib.o diffcore-break.o diffcore-order.o \ + diff.o diff-lib.o diffcore-break.o diffcore-order.o \ diffcore-pickaxe.o diffcore-rename.o tree-diff.o combine-diff.o \ diffcore-delta.o log-tree.o LIB_OBJS = \ - blob.o commit.o connect.o csum-file.o \ + blob.o commit.o connect.o csum-file.o cache-tree.o \ date.o diff-delta.o entry.o exec_cmd.o ident.o index.o \ object.o pack-check.o patch-delta.o path.o pkt-line.o \ quote.o read-cache.o refs.o run-command.o \ @@ -611,6 +611,12 @@ test-date$X: test-date.c date.o ctype.o test-delta$X: test-delta.c diff-delta.o patch-delta.o $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $^ -lz +test-dump-cache-tree$X: dump-cache-tree.o $(GITLIBS) + $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) + +test-gsimm$X: test-gsimm.c gsimm.o rabinpoly.o + $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $^ + check: for i in *.c; do sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; done @@ -659,6 +665,7 @@ clean: rm -f *.o mozilla-sha1/*.o arm/*.o ppc/*.o compat/*.o xdiff/*.o \ $(LIB_FILE) $(XDIFF_LIB) rm -f $(ALL_PROGRAMS) $(BUILT_INS) git$X + rm -f test-date$X test-delta$X test-gsimm$X rm -f *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags rm -rf $(GIT_TARNAME) rm -f $(GIT_TARNAME).tar.gz git-core_$(GIT_VERSION)-*.tar.gz @@ -8,6 +8,7 @@ */ #include <fnmatch.h> #include "cache.h" +#include "cache-tree.h" #include "quote.h" #include "blob.h" @@ -1717,6 +1718,7 @@ static void remove_file(struct patch *patch) if (write_index) { if (remove_file_from_cache(patch->old_name) < 0) die("unable to remove %s from index", patch->old_name); + cache_tree_invalidate_path(active_cache_tree, patch->old_name); } unlink(patch->old_name); } @@ -1813,8 +1815,9 @@ static void create_file(struct patch *patch) if (!mode) mode = S_IFREG | 0644; - create_one_file(path, mode, buf, size); + create_one_file(path, mode, buf, size); add_index_file(path, mode, buf, size); + cache_tree_invalidate_path(active_cache_tree, path); } static void write_out_one_result(struct patch *patch) diff --git a/builtin-log.c b/builtin-log.c index 69f2911cb4..a39aed6d86 100644 --- a/builtin-log.c +++ b/builtin-log.c @@ -9,6 +9,7 @@ #include "diff.h" #include "revision.h" #include "log-tree.h" +#include "builtin.h" static int cmd_log_wc(int argc, const char **argv, char **envp, struct rev_info *rev) @@ -67,3 +68,41 @@ int cmd_log(int argc, const char **argv, char **envp) rev.diffopt.recursive = 1; return cmd_log_wc(argc, argv, envp, &rev); } + +int cmd_format_patch(int argc, const char **argv, char **envp) +{ + struct commit *commit; + struct commit **list = NULL; + struct rev_info rev; + int nr = 0; + + init_revisions(&rev); + rev.commit_format = CMIT_FMT_EMAIL; + rev.verbose_header = 1; + rev.diff = 1; + rev.diffopt.with_raw = 0; + rev.diffopt.with_stat = 1; + rev.combine_merges = 0; + rev.ignore_merges = 1; + rev.diffopt.output_format = DIFF_FORMAT_PATCH; + argc = setup_revisions(argc, argv, &rev, "HEAD"); + + prepare_revision_walk(&rev); + while ((commit = get_revision(&rev)) != NULL) { + nr++; + list = realloc(list, nr * sizeof(list[0])); + list[nr - 1] = commit; + } + while (0 <= --nr) { + int shown; + commit = list[nr]; + shown = log_tree_commit(&rev, commit); + free(commit->buffer); + commit->buffer = NULL; + if (shown) + printf("-- \n%s\n\n", git_version_string); + } + free(list); + return 0; +} + @@ -19,5 +19,6 @@ extern int cmd_version(int argc, const char **argv, char **envp); extern int cmd_whatchanged(int argc, const char **argv, char **envp); extern int cmd_show(int argc, const char **argv, char **envp); extern int cmd_log(int argc, const char **argv, char **envp); +extern int cmd_format_patch(int argc, const char **argv, char **envp); #endif diff --git a/cache-tree.c b/cache-tree.c new file mode 100644 index 0000000000..2146723e90 --- /dev/null +++ b/cache-tree.c @@ -0,0 +1,463 @@ +#include "cache.h" +#include "tree.h" +#include "cache-tree.h" + +#define DEBUG 0 + +struct cache_tree *cache_tree(void) +{ + struct cache_tree *it = xcalloc(1, sizeof(struct cache_tree)); + it->entry_count = -1; + return it; +} + +void cache_tree_free(struct cache_tree **it_p) +{ + int i; + struct cache_tree *it = *it_p; + + if (!it) + return; + for (i = 0; i < it->subtree_nr; i++) + cache_tree_free(&it->down[i]->cache_tree); + free(it->down); + free(it); + *it_p = NULL; +} + +static struct cache_tree_sub *find_subtree(struct cache_tree *it, + const char *path, + int pathlen, + int create) +{ + int i; + struct cache_tree_sub *down; + for (i = 0; i < it->subtree_nr; i++) { + down = it->down[i]; + if (down->namelen == pathlen && + !memcmp(down->name, path, pathlen)) + return down; + } + if (!create) + return NULL; + if (it->subtree_alloc <= it->subtree_nr) { + it->subtree_alloc = alloc_nr(it->subtree_alloc); + it->down = xrealloc(it->down, it->subtree_alloc * + sizeof(*it->down)); + } + down = xmalloc(sizeof(*down) + pathlen + 1); + down->cache_tree = NULL; /* cache_tree(); */ + down->namelen = pathlen; + memcpy(down->name, path, pathlen); + down->name[pathlen] = 0; /* not strictly needed */ + it->down[it->subtree_nr++] = down; + return down; +} + +void cache_tree_invalidate_path(struct cache_tree *it, const char *path) +{ + /* a/b/c + * ==> invalidate self + * ==> find "a", have it invalidate "b/c" + * a + * ==> invalidate self + * ==> if "a" exists as a subtree, remove it. + */ + const char *slash; + int namelen; + struct cache_tree_sub *down; + + if (!it) + return; + slash = strchr(path, '/'); + it->entry_count = -1; + if (!slash) { + int i; + namelen = strlen(path); + for (i = 0; i < it->subtree_nr; i++) { + if (it->down[i]->namelen == namelen && + !memcmp(it->down[i]->name, path, namelen)) + break; + } + if (i < it->subtree_nr) { + cache_tree_free(&it->down[i]->cache_tree); + free(it->down[i]); + /* 0 1 2 3 4 5 + * ^ ^subtree_nr = 6 + * i + * move 4 and 5 up one place (2 entries) + * 2 = 6 - 3 - 1 = subtree_nr - i - 1 + */ + memmove(it->down+i, it->down+i+1, + sizeof(struct cache_tree_sub *) * + (it->subtree_nr - i - 1)); + it->subtree_nr--; + } + return; + } + namelen = slash - path; + down = find_subtree(it, path, namelen, 0); + if (down) + cache_tree_invalidate_path(down->cache_tree, slash + 1); +} + +static int verify_cache(struct cache_entry **cache, + int entries) +{ + int i, funny; + + /* Verify that the tree is merged */ + funny = 0; + for (i = 0; i < entries; i++) { + struct cache_entry *ce = cache[i]; + if (ce_stage(ce)) { + if (10 < ++funny) { + fprintf(stderr, "...\n"); + break; + } + fprintf(stderr, "%s: unmerged (%s)\n", + ce->name, sha1_to_hex(ce->sha1)); + } + } + if (funny) + return -1; + + /* Also verify that the cache does not have path and path/file + * at the same time. At this point we know the cache has only + * stage 0 entries. + */ + funny = 0; + for (i = 0; i < entries - 1; i++) { + /* path/file always comes after path because of the way + * the cache is sorted. Also path can appear only once, + * which means conflicting one would immediately follow. + */ + const char *this_name = cache[i]->name; + const char *next_name = cache[i+1]->name; + int this_len = strlen(this_name); + if (this_len < strlen(next_name) && + strncmp(this_name, next_name, this_len) == 0 && + next_name[this_len] == '/') { + if (10 < ++funny) { + fprintf(stderr, "...\n"); + break; + } + fprintf(stderr, "You have both %s and %s\n", + this_name, next_name); + } + } + if (funny) + return -1; + return 0; +} + +static void discard_unused_subtrees(struct cache_tree *it) +{ + struct cache_tree_sub **down = it->down; + int nr = it->subtree_nr; + int dst, src; + for (dst = src = 0; src < nr; src++) { + struct cache_tree_sub *s = down[src]; + if (s->used) + down[dst++] = s; + else { + cache_tree_free(&s->cache_tree); + free(s); + it->subtree_nr--; + } + } +} + +int cache_tree_fully_valid(struct cache_tree *it) +{ + int i; + if (!it) + return 0; + if (it->entry_count < 0 || !has_sha1_file(it->sha1)) + return 0; + for (i = 0; i < it->subtree_nr; i++) { + if (!cache_tree_fully_valid(it->down[i]->cache_tree)) + return 0; + } + return 1; +} + +static int update_one(struct cache_tree *it, + struct cache_entry **cache, + int entries, + const char *base, + int baselen, + int missing_ok) +{ + unsigned long size, offset; + char *buffer; + int i; + + if (0 <= it->entry_count && has_sha1_file(it->sha1)) + return it->entry_count; + + /* + * We first scan for subtrees and update them; we start by + * marking existing subtrees -- the ones that are unmarked + * should not be in the result. + */ + for (i = 0; i < it->subtree_nr; i++) + it->down[i]->used = 0; + + /* + * Find the subtrees and update them. + */ + for (i = 0; i < entries; i++) { + struct cache_entry *ce = cache[i]; + struct cache_tree_sub *sub; + const char *path, *slash; + int pathlen, sublen, subcnt; + + path = ce->name; + pathlen = ce_namelen(ce); + if (pathlen <= baselen || memcmp(base, path, baselen)) + break; /* at the end of this level */ + + slash = strchr(path + baselen, '/'); + if (!slash) + continue; + /* + * a/bbb/c (base = a/, slash = /c) + * ==> + * path+baselen = bbb/c, sublen = 3 + */ + sublen = slash - (path + baselen); + sub = find_subtree(it, path + baselen, sublen, 1); + if (!sub->cache_tree) + sub->cache_tree = cache_tree(); + subcnt = update_one(sub->cache_tree, + cache + i, entries - i, + path, + baselen + sublen + 1, + missing_ok); + i += subcnt - 1; + sub->used = 1; + } + + discard_unused_subtrees(it); + + /* + * Then write out the tree object for this level. + */ + size = 8192; + buffer = xmalloc(size); + offset = 0; + + for (i = 0; i < entries; i++) { + struct cache_entry *ce = cache[i]; + struct cache_tree_sub *sub; + const char *path, *slash; + int pathlen, entlen; + const unsigned char *sha1; + unsigned mode; + + path = ce->name; + pathlen = ce_namelen(ce); + if (pathlen <= baselen || memcmp(base, path, baselen)) + break; /* at the end of this level */ + + slash = strchr(path + baselen, '/'); + if (slash) { + entlen = slash - (path + baselen); + sub = find_subtree(it, path + baselen, entlen, 0); + if (!sub) + die("cache-tree.c: '%.*s' in '%s' not found", + entlen, path + baselen, path); + i += sub->cache_tree->entry_count - 1; + sha1 = sub->cache_tree->sha1; + mode = S_IFDIR; + } + else { + sha1 = ce->sha1; + mode = ntohl(ce->ce_mode); + entlen = pathlen - baselen; + } + if (!missing_ok && !has_sha1_file(sha1)) + return error("invalid object %s", sha1_to_hex(sha1)); + + if (!ce->ce_mode) + continue; /* entry being removed */ + + if (size < offset + entlen + 100) { + size = alloc_nr(offset + entlen + 100); + buffer = xrealloc(buffer, size); + } + offset += sprintf(buffer + offset, + "%o %.*s", mode, entlen, path + baselen); + buffer[offset++] = 0; + memcpy(buffer + offset, sha1, 20); + offset += 20; + +#if DEBUG + fprintf(stderr, "cache-tree %o %.*s\n", + mode, entlen, path + baselen); +#endif + } + + write_sha1_file(buffer, offset, tree_type, it->sha1); + free(buffer); + it->entry_count = i; +#if DEBUG + fprintf(stderr, "cache-tree (%d ent, %d subtree) %s\n", + it->entry_count, it->subtree_nr, + sha1_to_hex(it->sha1)); +#endif + return i; +} + +int cache_tree_update(struct cache_tree *it, + struct cache_entry **cache, + int entries, + int missing_ok) +{ + int i; + i = verify_cache(cache, entries); + if (i) + return i; + i = update_one(it, cache, entries, "", 0, missing_ok); + if (i < 0) + return i; + return 0; +} + +static void *write_one(struct cache_tree *it, + char *path, + int pathlen, + char *buffer, + unsigned long *size, + unsigned long *offset) +{ + int i; + + /* One "cache-tree" entry consists of the following: + * path (NUL terminated) + * entry_count, subtree_nr ("%d %d\n") + * tree-sha1 (missing if invalid) + * subtree_nr "cache-tree" entries for subtrees. + */ + if (*size < *offset + pathlen + 100) { + *size = alloc_nr(*offset + pathlen + 100); + buffer = xrealloc(buffer, *size); + } + *offset += sprintf(buffer + *offset, "%.*s%c%d %d\n", + pathlen, path, 0, + it->entry_count, it->subtree_nr); + +#if DEBUG + if (0 <= it->entry_count) + fprintf(stderr, "cache-tree <%.*s> (%d ent, %d subtree) %s\n", + pathlen, path, it->entry_count, it->subtree_nr, + sha1_to_hex(it->sha1)); + else + fprintf(stderr, "cache-tree <%.*s> (%d subtree) invalid\n", + pathlen, path, it->subtree_nr); +#endif + + if (0 <= it->entry_count) { + memcpy(buffer + *offset, it->sha1, 20); + *offset += 20; + } + for (i = 0; i < it->subtree_nr; i++) { + struct cache_tree_sub *down = it->down[i]; + buffer = write_one(down->cache_tree, down->name, down->namelen, + buffer, size, offset); + } + return buffer; +} + +void *cache_tree_write(struct cache_tree *root, unsigned long *size_p) +{ + char path[PATH_MAX]; + unsigned long size = 8192; + char *buffer = xmalloc(size); + + *size_p = 0; + path[0] = 0; + return write_one(root, path, 0, buffer, &size, size_p); +} + +static struct cache_tree *read_one(const char **buffer, unsigned long *size_p) +{ + const char *buf = *buffer; + unsigned long size = *size_p; + struct cache_tree *it; + int i, subtree_nr; + + it = NULL; + /* skip name, but make sure name exists */ + while (size && *buf) { + size--; + buf++; + } + if (!size) + goto free_return; + buf++; size--; + it = cache_tree(); + if (sscanf(buf, "%d %d\n", &it->entry_count, &subtree_nr) != 2) + goto free_return; + while (size && *buf && *buf != '\n') { + size--; + buf++; + } + if (!size) + goto free_return; + buf++; size--; + if (0 <= it->entry_count) { + if (size < 20) + goto free_return; + memcpy(it->sha1, buf, 20); + buf += 20; + size -= 20; + } + +#if DEBUG + if (0 <= it->entry_count) + fprintf(stderr, "cache-tree <%s> (%d ent, %d subtree) %s\n", + *buffer, it->entry_count, subtree_nr, + sha1_to_hex(it->sha1)); + else + fprintf(stderr, "cache-tree <%s> (%d subtrees) invalid\n", + *buffer, subtree_nr); +#endif + + /* + * Just a heuristic -- we do not add directories that often but + * we do not want to have to extend it immediately when we do, + * hence +2. + */ + it->subtree_alloc = subtree_nr + 2; + it->down = xcalloc(it->subtree_alloc, sizeof(struct cache_tree_sub *)); + for (i = 0; i < subtree_nr; i++) { + /* read each subtree */ + struct cache_tree *sub; + const char *name = buf; + int namelen; + sub = read_one(&buf, &size); + if (!sub) + goto free_return; + namelen = strlen(name); + it->down[i] = find_subtree(it, name, namelen, 1); + it->down[i]->cache_tree = sub; + } + if (subtree_nr != it->subtree_nr) + die("cache-tree: internal error"); + *buffer = buf; + *size_p = size; + return it; + + free_return: + cache_tree_free(&it); + return NULL; +} + +struct cache_tree *cache_tree_read(const char *buffer, unsigned long size) +{ + if (buffer[0]) + return NULL; /* not the whole tree */ + return read_one(&buffer, &size); +} diff --git a/cache-tree.h b/cache-tree.h new file mode 100644 index 0000000000..c70a7699a9 --- /dev/null +++ b/cache-tree.h @@ -0,0 +1,30 @@ +#ifndef CACHE_TREE_H +#define CACHE_TREE_H + +struct cache_tree; +struct cache_tree_sub { + struct cache_tree *cache_tree; + int namelen; + int used; + char name[FLEX_ARRAY]; +}; + +struct cache_tree { + int entry_count; /* negative means "invalid" */ + unsigned char sha1[20]; + int subtree_nr; + int subtree_alloc; + struct cache_tree_sub **down; +}; + +struct cache_tree *cache_tree(void); +void cache_tree_free(struct cache_tree **); +void cache_tree_invalidate_path(struct cache_tree *, const char *); + +void *cache_tree_write(struct cache_tree *root, unsigned long *size_p); +struct cache_tree *cache_tree_read(const char *buffer, unsigned long size); + +int cache_tree_fully_valid(struct cache_tree *); +int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int); + +#endif @@ -114,6 +114,7 @@ static inline unsigned int create_ce_mode(unsigned int mode) extern struct cache_entry **active_cache; extern unsigned int active_nr, active_alloc, active_cache_changed; +extern struct cache_tree *active_cache_tree; #define GIT_DIR_ENVIRONMENT "GIT_DIR" #define DEFAULT_GIT_DIR_ENVIRONMENT ".git" diff --git a/checkout-index.c b/checkout-index.c index dd6a2d86fe..e56c354f8c 100644 --- a/checkout-index.c +++ b/checkout-index.c @@ -39,6 +39,7 @@ #include "cache.h" #include "strbuf.h" #include "quote.h" +#include "cache-tree.h" #define CHECKOUT_ALL 4 static const char *prefix; @@ -36,6 +36,8 @@ enum cmit_fmt get_commit_format(const char *arg) return CMIT_FMT_FULL; if (!strcmp(arg, "=fuller")) return CMIT_FMT_FULLER; + if (!strcmp(arg, "=email")) + return CMIT_FMT_EMAIL; if (!strcmp(arg, "=oneline")) return CMIT_FMT_ONELINE; die("invalid --pretty format"); @@ -428,6 +430,10 @@ static int add_user_info(const char *what, enum cmit_fmt fmt, char *buf, const c time = strtoul(date, &date, 10); tz = strtol(date, NULL, 10); + if (fmt == CMIT_FMT_EMAIL) { + what = "From"; + filler = ""; + } ret = sprintf(buf, "%s: %.*s%.*s\n", what, (fmt == CMIT_FMT_FULLER) ? 4 : 0, filler, namelen, line); @@ -435,6 +441,9 @@ static int add_user_info(const char *what, enum cmit_fmt fmt, char *buf, const c case CMIT_FMT_MEDIUM: ret += sprintf(buf + ret, "Date: %s\n", show_date(time, tz)); break; + case CMIT_FMT_EMAIL: + ret += sprintf(buf + ret, "Date: %s\n", show_date(time, tz)); + break; case CMIT_FMT_FULLER: ret += sprintf(buf + ret, "%sDate: %s\n", what, show_date(time, tz)); break; @@ -445,10 +454,12 @@ static int add_user_info(const char *what, enum cmit_fmt fmt, char *buf, const c return ret; } -static int is_empty_line(const char *line, int len) +static int is_empty_line(const char *line, int *len_p) { + int len = *len_p; while (len && isspace(line[len-1])) len--; + *len_p = len; return !len; } @@ -457,7 +468,8 @@ static int add_merge_info(enum cmit_fmt fmt, char *buf, const struct commit *com struct commit_list *parent = commit->parents; int offset; - if ((fmt == CMIT_FMT_ONELINE) || !parent || !parent->next) + if ((fmt == CMIT_FMT_ONELINE) || (fmt == CMIT_FMT_EMAIL) || + !parent || !parent->next) return 0; offset = sprintf(buf, "Merge:"); @@ -480,9 +492,15 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit { int hdr = 1, body = 0; unsigned long offset = 0; - int indent = (fmt == CMIT_FMT_ONELINE) ? 0 : 4; + int indent = 4; int parents_shown = 0; const char *msg = commit->buffer; + const char *subject = NULL; + + if (fmt == CMIT_FMT_EMAIL) + subject = "Subject: [PATCH] "; + if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL) + indent = 0; for (;;) { const char *line = msg; @@ -506,7 +524,7 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit if (hdr) { if (linelen == 1) { hdr = 0; - if (fmt != CMIT_FMT_ONELINE) + if ((fmt != CMIT_FMT_ONELINE) && !subject) buf[offset++] = '\n'; continue; } @@ -544,20 +562,29 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit continue; } - if (is_empty_line(line, linelen)) { + if (is_empty_line(line, &linelen)) { if (!body) continue; + if (subject) + continue; if (fmt == CMIT_FMT_SHORT) break; } else { body = 1; } + if (subject) { + int slen = strlen(subject); + memcpy(buf + offset, subject, slen); + offset += slen; + } memset(buf + offset, ' ', indent); memcpy(buf + offset + indent, line, linelen); offset += linelen + indent; + buf[offset++] = '\n'; if (fmt == CMIT_FMT_ONELINE) break; + subject = NULL; } while (offset && isspace(buf[offset-1])) offset--; @@ -45,6 +45,7 @@ enum cmit_fmt { CMIT_FMT_FULL, CMIT_FMT_FULLER, CMIT_FMT_ONELINE, + CMIT_FMT_EMAIL, CMIT_FMT_UNSPECIFIED, }; diff --git a/diff-files.c b/diff-files.c index ffbef48b2e..b9d193d506 100644 --- a/diff-files.c +++ b/diff-files.c @@ -12,203 +12,43 @@ static const char diff_files_usage[] = "git-diff-files [-q] [-0/-1/2/3 |-c|--cc] [<common diff options>] [<path>...]" COMMON_DIFF_OPTIONS_HELP; -static struct rev_info rev; -static int silent = 0; -static int diff_unmerged_stage = 2; -static int combine_merges = 0; -static int dense_combined_merges = 0; - -static void show_unmerge(const char *path) -{ - diff_unmerge(&rev.diffopt, path); -} - -static void show_file(int pfx, struct cache_entry *ce) -{ - diff_addremove(&rev.diffopt, pfx, ntohl(ce->ce_mode), - ce->sha1, ce->name, NULL); -} - -static void show_modified(int oldmode, int mode, - const unsigned char *old_sha1, const unsigned char *sha1, - char *path) -{ - diff_change(&rev.diffopt, oldmode, mode, old_sha1, sha1, path, NULL); -} - int main(int argc, const char **argv) { - const char **pathspec; - const char *prefix = setup_git_directory(); - int entries, i; + struct rev_info rev; + int silent = 0; git_config(git_diff_config); - diff_setup(&rev.diffopt); + init_revisions(&rev); + rev.abbrev = 0; + + argc = setup_revisions(argc, argv, &rev, NULL); while (1 < argc && argv[1][0] == '-') { - if (!strcmp(argv[1], "--")) { - argv++; - argc--; - break; - } - if (!strcmp(argv[1], "-0")) - diff_unmerged_stage = 0; - else if (!strcmp(argv[1], "-1")) - diff_unmerged_stage = 1; - else if (!strcmp(argv[1], "-2")) - diff_unmerged_stage = 2; - else if (!strcmp(argv[1], "-3")) - diff_unmerged_stage = 3; - else if (!strcmp(argv[1], "--base")) - diff_unmerged_stage = 1; + if (!strcmp(argv[1], "--base")) + rev.max_count = 1; else if (!strcmp(argv[1], "--ours")) - diff_unmerged_stage = 2; + rev.max_count = 2; else if (!strcmp(argv[1], "--theirs")) - diff_unmerged_stage = 3; + rev.max_count = 3; else if (!strcmp(argv[1], "-q")) silent = 1; - else if (!strcmp(argv[1], "-r")) - ; /* no-op */ - else if (!strcmp(argv[1], "-s")) - ; /* no-op */ - else if (!strcmp(argv[1], "-c")) - combine_merges = 1; - else if (!strcmp(argv[1], "--cc")) - dense_combined_merges = combine_merges = 1; - else { - int diff_opt_cnt; - diff_opt_cnt = diff_opt_parse(&rev.diffopt, - argv+1, argc-1); - if (diff_opt_cnt < 0) - usage(diff_files_usage); - else if (diff_opt_cnt) { - argv += diff_opt_cnt; - argc -= diff_opt_cnt; - continue; - } - else - usage(diff_files_usage); - } + else + usage(diff_files_usage); argv++; argc--; } - if (dense_combined_merges) - rev.diffopt.output_format = DIFF_FORMAT_PATCH; - - /* Find the directory, and set up the pathspec */ - pathspec = get_pathspec(prefix, argv + 1); - entries = read_cache(); - - if (diff_setup_done(&rev.diffopt) < 0) + /* + * Make sure there are NO revision (i.e. pending object) parameter, + * rev.max_count is reasonable (0 <= n <= 3), + * there is no other revision filtering parameters. + */ + if (rev.pending_objects || + rev.min_age != -1 || rev.max_age != -1) usage(diff_files_usage); - - /* At this point, if argc == 1, then we are doing everything. - * Otherwise argv[1] .. argv[argc-1] have the explicit paths. + /* + * Backward compatibility wart - "diff-files -s" used to + * defeat the common diff option "-s" which asked for + * DIFF_FORMAT_NO_OUTPUT. */ - if (entries < 0) { - perror("read_cache"); - exit(1); - } - - for (i = 0; i < entries; i++) { - struct stat st; - unsigned int oldmode, newmode; - struct cache_entry *ce = active_cache[i]; - int changed; - - if (!ce_path_match(ce, pathspec)) - continue; - - if (ce_stage(ce)) { - struct { - struct combine_diff_path p; - struct combine_diff_parent filler[5]; - } combine; - int num_compare_stages = 0; - - combine.p.next = NULL; - combine.p.len = ce_namelen(ce); - combine.p.path = xmalloc(combine.p.len + 1); - memcpy(combine.p.path, ce->name, combine.p.len); - combine.p.path[combine.p.len] = 0; - combine.p.mode = 0; - memset(combine.p.sha1, 0, 20); - memset(&combine.p.parent[0], 0, - sizeof(combine.filler)); - - while (i < entries) { - struct cache_entry *nce = active_cache[i]; - int stage; - - if (strcmp(ce->name, nce->name)) - break; - - /* Stage #2 (ours) is the first parent, - * stage #3 (theirs) is the second. - */ - stage = ce_stage(nce); - if (2 <= stage) { - int mode = ntohl(nce->ce_mode); - num_compare_stages++; - memcpy(combine.p.parent[stage-2].sha1, - nce->sha1, 20); - combine.p.parent[stage-2].mode = - canon_mode(mode); - combine.p.parent[stage-2].status = - DIFF_STATUS_MODIFIED; - } - - /* diff against the proper unmerged stage */ - if (stage == diff_unmerged_stage) - ce = nce; - i++; - } - /* - * Compensate for loop update - */ - i--; - - if (combine_merges && num_compare_stages == 2) { - show_combined_diff(&combine.p, 2, - dense_combined_merges, - &rev); - free(combine.p.path); - continue; - } - free(combine.p.path); - - /* - * Show the diff for the 'ce' if we found the one - * from the desired stage. - */ - show_unmerge(ce->name); - if (ce_stage(ce) != diff_unmerged_stage) - continue; - } - - if (lstat(ce->name, &st) < 0) { - if (errno != ENOENT && errno != ENOTDIR) { - perror(ce->name); - continue; - } - if (silent) - continue; - show_file('-', ce); - continue; - } - changed = ce_match_stat(ce, &st, 0); - if (!changed && !rev.diffopt.find_copies_harder) - continue; - oldmode = ntohl(ce->ce_mode); - - newmode = canon_mode(st.st_mode); - if (!trust_executable_bit && - S_ISREG(newmode) && S_ISREG(oldmode) && - ((newmode ^ oldmode) == 0111)) - newmode = oldmode; - show_modified(oldmode, newmode, - ce->sha1, (changed ? null_sha1 : ce->sha1), - ce->name); - } - diffcore_std(&rev.diffopt); - diff_flush(&rev.diffopt); - return 0; + if (rev.diffopt.output_format == DIFF_FORMAT_NO_OUTPUT) + rev.diffopt.output_format = DIFF_FORMAT_RAW; + return run_diff_files(&rev, silent); } diff --git a/diff-index.c b/diff-index.c index e376d65f80..86940123b3 100644 --- a/diff-index.c +++ b/diff-index.c @@ -1,166 +1,7 @@ #include "cache.h" -#include "tree.h" #include "diff.h" - -static int cached_only = 0; -static int match_nonexisting = 0; -static struct diff_options diff_options; - -/* A file entry went away or appeared */ -static void show_file(const char *prefix, - struct cache_entry *ce, - unsigned char *sha1, unsigned int mode) -{ - diff_addremove(&diff_options, prefix[0], ntohl(mode), - sha1, ce->name, NULL); -} - -static int get_stat_data(struct cache_entry *ce, - unsigned char ** sha1p, unsigned int *modep) -{ - unsigned char *sha1 = ce->sha1; - unsigned int mode = ce->ce_mode; - - if (!cached_only) { - static unsigned char no_sha1[20]; - int changed; - struct stat st; - if (lstat(ce->name, &st) < 0) { - if (errno == ENOENT && match_nonexisting) { - *sha1p = sha1; - *modep = mode; - return 0; - } - return -1; - } - changed = ce_match_stat(ce, &st, 0); - if (changed) { - mode = create_ce_mode(st.st_mode); - if (!trust_executable_bit && S_ISREG(st.st_mode)) - mode = ce->ce_mode; - sha1 = no_sha1; - } - } - - *sha1p = sha1; - *modep = mode; - return 0; -} - -static void show_new_file(struct cache_entry *new) -{ - unsigned char *sha1; - unsigned int mode; - - /* New file in the index: it might actually be different in - * the working copy. - */ - if (get_stat_data(new, &sha1, &mode) < 0) - return; - - show_file("+", new, sha1, mode); -} - -static int show_modified(struct cache_entry *old, - struct cache_entry *new, - int report_missing) -{ - unsigned int mode, oldmode; - unsigned char *sha1; - - if (get_stat_data(new, &sha1, &mode) < 0) { - if (report_missing) - show_file("-", old, old->sha1, old->ce_mode); - return -1; - } - - oldmode = old->ce_mode; - if (mode == oldmode && !memcmp(sha1, old->sha1, 20) && - !diff_options.find_copies_harder) - return 0; - - mode = ntohl(mode); - oldmode = ntohl(oldmode); - - diff_change(&diff_options, oldmode, mode, - old->sha1, sha1, old->name, NULL); - return 0; -} - -static int diff_cache(struct cache_entry **ac, int entries, const char **pathspec) -{ - while (entries) { - struct cache_entry *ce = *ac; - int same = (entries > 1) && ce_same_name(ce, ac[1]); - - if (!ce_path_match(ce, pathspec)) - goto skip_entry; - - switch (ce_stage(ce)) { - case 0: - /* No stage 1 entry? That means it's a new file */ - if (!same) { - show_new_file(ce); - break; - } - /* Show difference between old and new */ - show_modified(ac[1], ce, 1); - break; - case 1: - /* No stage 3 (merge) entry? That means it's been deleted */ - if (!same) { - show_file("-", ce, ce->sha1, ce->ce_mode); - break; - } - /* We come here with ce pointing at stage 1 - * (original tree) and ac[1] pointing at stage - * 3 (unmerged). show-modified with - * report-missing set to false does not say the - * file is deleted but reports true if work - * tree does not have it, in which case we - * fall through to report the unmerged state. - * Otherwise, we show the differences between - * the original tree and the work tree. - */ - if (!cached_only && !show_modified(ce, ac[1], 0)) - break; - /* fallthru */ - case 3: - diff_unmerge(&diff_options, ce->name); - break; - - default: - die("impossible cache entry stage"); - } - -skip_entry: - /* - * Ignore all the different stages for this file, - * we've handled the relevant cases now. - */ - do { - ac++; - entries--; - } while (entries && ce_same_name(ce, ac[0])); - } - return 0; -} - -/* - * This turns all merge entries into "stage 3". That guarantees that - * when we read in the new tree (into "stage 1"), we won't lose sight - * of the fact that we had unmerged entries. - */ -static void mark_merge_entries(void) -{ - int i; - for (i = 0; i < active_nr; i++) { - struct cache_entry *ce = active_cache[i]; - if (!ce_stage(ce)) - continue; - ce->ce_flags |= htons(CE_STAGEMASK); - } -} +#include "commit.h" +#include "revision.h" static const char diff_cache_usage[] = "git-diff-index [-m] [--cached] " @@ -169,85 +10,30 @@ COMMON_DIFF_OPTIONS_HELP; int main(int argc, const char **argv) { - const char *tree_name = NULL; - unsigned char sha1[20]; - const char *prefix = setup_git_directory(); - const char **pathspec = NULL; - struct tree *tree; - int ret; - int allow_options = 1; + struct rev_info rev; + int match_missing = 0; + int cached = 0; int i; git_config(git_diff_config); - diff_setup(&diff_options); + init_revisions(&rev); + rev.abbrev = 0; + + argc = setup_revisions(argc, argv, &rev, NULL); for (i = 1; i < argc; i++) { const char *arg = argv[i]; - int diff_opt_cnt; - - if (!allow_options || *arg != '-') { - if (tree_name) - break; - tree_name = arg; - continue; - } - if (!strcmp(arg, "--")) { - allow_options = 0; - continue; - } - if (!strcmp(arg, "-r")) { - /* We accept the -r flag just to look like git-diff-tree */ - continue; - } - if (!strcmp(arg, "--cc")) - /* - * I _think_ "diff-index --cached HEAD" with an - * unmerged index could show something else - * later, but pretend --cc is the same as -p for - * now. "git diff" uses --cc by default. - */ - argv[i] = arg = "-p"; - diff_opt_cnt = diff_opt_parse(&diff_options, argv + i, - argc - i); - if (diff_opt_cnt < 0) + if (!strcmp(arg, "--cached")) + cached = 1; + else usage(diff_cache_usage); - else if (diff_opt_cnt) { - i += diff_opt_cnt - 1; - continue; - } - - if (!strcmp(arg, "-m")) { - match_nonexisting = 1; - continue; - } - if (!strcmp(arg, "--cached")) { - cached_only = 1; - continue; - } - usage(diff_cache_usage); } - - pathspec = get_pathspec(prefix, argv + i); - - if (diff_setup_done(&diff_options) < 0) - usage(diff_cache_usage); - - if (!tree_name || get_sha1(tree_name, sha1)) + /* + * Make sure there is one revision (i.e. pending object), + * and there is no revision filtering parameters. + */ + if (!rev.pending_objects || rev.pending_objects->next || + rev.max_count != -1 || rev.min_age != -1 || rev.max_age != -1) usage(diff_cache_usage); - - read_cache(); - - mark_merge_entries(); - - tree = parse_tree_indirect(sha1); - if (!tree) - die("bad tree object %s", tree_name); - if (read_tree(tree, 1, pathspec)) - die("unable to read tree object %s", tree_name); - - ret = diff_cache(active_cache, active_nr, pathspec); - - diffcore_std(&diff_options); - diff_flush(&diff_options); - return ret; + return run_diff_index(&rev, cached); } diff --git a/diff-lib.c b/diff-lib.c index 13b216f273..2183b41b03 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -1,1795 +1,344 @@ /* * Copyright (C) 2005 Junio C Hamano */ -#include <sys/types.h> -#include <sys/wait.h> -#include <signal.h> #include "cache.h" #include "quote.h" +#include "commit.h" #include "diff.h" #include "diffcore.h" -#include "xdiff-interface.h" +#include "revision.h" -static int use_size_cache; - -int diff_rename_limit_default = -1; - -int git_diff_config(const char *var, const char *value) -{ - if (!strcmp(var, "diff.renamelimit")) { - diff_rename_limit_default = git_config_int(var, value); - return 0; - } - - return git_default_config(var, value); -} - -static char *quote_one(const char *str) -{ - int needlen; - char *xp; - - if (!str) - return NULL; - needlen = quote_c_style(str, NULL, NULL, 0); - if (!needlen) - return strdup(str); - xp = xmalloc(needlen + 1); - quote_c_style(str, xp, NULL, 0); - return xp; -} - -static char *quote_two(const char *one, const char *two) -{ - int need_one = quote_c_style(one, NULL, NULL, 1); - int need_two = quote_c_style(two, NULL, NULL, 1); - char *xp; - - if (need_one + need_two) { - if (!need_one) need_one = strlen(one); - if (!need_two) need_one = strlen(two); - - xp = xmalloc(need_one + need_two + 3); - xp[0] = '"'; - quote_c_style(one, xp + 1, NULL, 1); - quote_c_style(two, xp + need_one + 1, NULL, 1); - strcpy(xp + need_one + need_two + 1, "\""); - return xp; - } - need_one = strlen(one); - need_two = strlen(two); - xp = xmalloc(need_one + need_two + 1); - strcpy(xp, one); - strcpy(xp + need_one, two); - return xp; -} - -static const char *external_diff(void) -{ - static const char *external_diff_cmd = NULL; - static int done_preparing = 0; - - if (done_preparing) - return external_diff_cmd; - external_diff_cmd = getenv("GIT_EXTERNAL_DIFF"); - done_preparing = 1; - return external_diff_cmd; -} - -#define TEMPFILE_PATH_LEN 50 - -static struct diff_tempfile { - const char *name; /* filename external diff should read from */ - char hex[41]; - char mode[10]; - char tmp_path[TEMPFILE_PATH_LEN]; -} diff_temp[2]; - -static int count_lines(const char *data, int size) -{ - int count, ch, completely_empty = 1, nl_just_seen = 0; - count = 0; - while (0 < size--) { - ch = *data++; - if (ch == '\n') { - count++; - nl_just_seen = 1; - completely_empty = 0; - } - else { - nl_just_seen = 0; - completely_empty = 0; - } - } - if (completely_empty) - return 0; - if (!nl_just_seen) - count++; /* no trailing newline */ - return count; -} - -static void print_line_count(int count) -{ - switch (count) { - case 0: - printf("0,0"); - break; - case 1: - printf("1"); - break; - default: - printf("1,%d", count); - break; - } -} - -static void copy_file(int prefix, const char *data, int size) -{ - int ch, nl_just_seen = 1; - while (0 < size--) { - ch = *data++; - if (nl_just_seen) - putchar(prefix); - putchar(ch); - if (ch == '\n') - nl_just_seen = 1; - else - nl_just_seen = 0; - } - if (!nl_just_seen) - printf("\n\\ No newline at end of file\n"); -} +/* + * diff-files + */ -static void emit_rewrite_diff(const char *name_a, - const char *name_b, - struct diff_filespec *one, - struct diff_filespec *two) +int run_diff_files(struct rev_info *revs, int silent_on_removed) { - int lc_a, lc_b; - diff_populate_filespec(one, 0); - diff_populate_filespec(two, 0); - lc_a = count_lines(one->data, one->size); - lc_b = count_lines(two->data, two->size); - printf("--- %s\n+++ %s\n@@ -", name_a, name_b); - print_line_count(lc_a); - printf(" +"); - print_line_count(lc_b); - printf(" @@\n"); - if (lc_a) - copy_file('-', one->data, one->size); - if (lc_b) - copy_file('+', two->data, two->size); -} + int entries, i; + int diff_unmerged_stage = revs->max_count; -static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one) -{ - if (!DIFF_FILE_VALID(one)) { - mf->ptr = ""; /* does not matter */ - mf->size = 0; - return 0; - } - else if (diff_populate_filespec(one, 0)) + if (diff_unmerged_stage < 0) + diff_unmerged_stage = 2; + entries = read_cache(); + if (entries < 0) { + perror("read_cache"); return -1; - mf->ptr = one->data; - mf->size = one->size; - return 0; -} - -struct emit_callback { - const char **label_path; -}; - -static int fn_out(void *priv, mmbuffer_t *mb, int nbuf) -{ - int i; - struct emit_callback *ecbdata = priv; - - if (ecbdata->label_path[0]) { - printf("--- %s\n", ecbdata->label_path[0]); - printf("+++ %s\n", ecbdata->label_path[1]); - ecbdata->label_path[0] = ecbdata->label_path[1] = NULL; } - for (i = 0; i < nbuf; i++) - if (!fwrite(mb[i].ptr, mb[i].size, 1, stdout)) - return -1; - return 0; -} - -static char *pprint_rename(const char *a, const char *b) -{ - const char *old = a; - const char *new = b; - char *name = NULL; - int pfx_length, sfx_length; - int len_a = strlen(a); - int len_b = strlen(b); - - /* Find common prefix */ - pfx_length = 0; - while (*old && *new && *old == *new) { - if (*old == '/') - pfx_length = old - a + 1; - old++; - new++; - } - - /* Find common suffix */ - old = a + len_a; - new = b + len_b; - sfx_length = 0; - while (a <= old && b <= new && *old == *new) { - if (*old == '/') - sfx_length = len_a - (old - a); - old--; - new--; - } - - /* - * pfx{mid-a => mid-b}sfx - * {pfx-a => pfx-b}sfx - * pfx{sfx-a => sfx-b} - * name-a => name-b - */ - if (pfx_length + sfx_length) { - name = xmalloc(len_a + len_b - pfx_length - sfx_length + 7); - sprintf(name, "%.*s{%.*s => %.*s}%s", - pfx_length, a, - len_a - pfx_length - sfx_length, a + pfx_length, - len_b - pfx_length - sfx_length, b + pfx_length, - a + len_a - sfx_length); - } - else { - name = xmalloc(len_a + len_b + 5); - sprintf(name, "%s => %s", a, b); - } - return name; -} - -struct diffstat_t { - struct xdiff_emit_state xm; - - int nr; - int alloc; - struct diffstat_file { - char *name; - unsigned is_unmerged:1; - unsigned is_binary:1; - unsigned is_renamed:1; - unsigned int added, deleted; - } **files; -}; - -static struct diffstat_file *diffstat_add(struct diffstat_t *diffstat, - const char *name_a, - const char *name_b) -{ - struct diffstat_file *x; - x = xcalloc(sizeof (*x), 1); - if (diffstat->nr == diffstat->alloc) { - diffstat->alloc = alloc_nr(diffstat->alloc); - diffstat->files = xrealloc(diffstat->files, - diffstat->alloc * sizeof(x)); - } - diffstat->files[diffstat->nr++] = x; - if (name_b) { - x->name = pprint_rename(name_a, name_b); - x->is_renamed = 1; - } - else - x->name = strdup(name_a); - return x; -} - -static void diffstat_consume(void *priv, char *line, unsigned long len) -{ - struct diffstat_t *diffstat = priv; - struct diffstat_file *x = diffstat->files[diffstat->nr - 1]; - - if (line[0] == '+') - x->added++; - else if (line[0] == '-') - x->deleted++; -} - -static const char pluses[] = "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"; -static const char minuses[]= "----------------------------------------------------------------------"; - -static void show_stats(struct diffstat_t* data) -{ - char *prefix = ""; - int i, len, add, del, total, adds = 0, dels = 0; - int max, max_change = 0, max_len = 0; - int total_files = data->nr; - - if (data->nr == 0) - return; - - for (i = 0; i < data->nr; i++) { - struct diffstat_file *file = data->files[i]; - - len = strlen(file->name); - if (max_len < len) - max_len = len; + for (i = 0; i < entries; i++) { + struct stat st; + unsigned int oldmode, newmode; + struct cache_entry *ce = active_cache[i]; + int changed; - if (file->is_binary || file->is_unmerged) + if (!ce_path_match(ce, revs->prune_data)) continue; - if (max_change < file->added + file->deleted) - max_change = file->added + file->deleted; - } - for (i = 0; i < data->nr; i++) { - char *name = data->files[i]->name; - int added = data->files[i]->added; - int deleted = data->files[i]->deleted; + if (ce_stage(ce)) { + struct { + struct combine_diff_path p; + struct combine_diff_parent filler[5]; + } combine; + int num_compare_stages = 0; + + combine.p.next = NULL; + combine.p.len = ce_namelen(ce); + combine.p.path = xmalloc(combine.p.len + 1); + memcpy(combine.p.path, ce->name, combine.p.len); + combine.p.path[combine.p.len] = 0; + combine.p.mode = 0; + memset(combine.p.sha1, 0, 20); + memset(&combine.p.parent[0], 0, + sizeof(combine.filler)); + + while (i < entries) { + struct cache_entry *nce = active_cache[i]; + int stage; + + if (strcmp(ce->name, nce->name)) + break; + + /* Stage #2 (ours) is the first parent, + * stage #3 (theirs) is the second. + */ + stage = ce_stage(nce); + if (2 <= stage) { + int mode = ntohl(nce->ce_mode); + num_compare_stages++; + memcpy(combine.p.parent[stage-2].sha1, + nce->sha1, 20); + combine.p.parent[stage-2].mode = + canon_mode(mode); + combine.p.parent[stage-2].status = + DIFF_STATUS_MODIFIED; + } + + /* diff against the proper unmerged stage */ + if (stage == diff_unmerged_stage) + ce = nce; + i++; + } + /* + * Compensate for loop update + */ + i--; - if (0 < (len = quote_c_style(name, NULL, NULL, 0))) { - char *qname = xmalloc(len + 1); - quote_c_style(name, qname, NULL, 0); - free(name); - data->files[i]->name = name = qname; - } + if (revs->combine_merges && num_compare_stages == 2) { + show_combined_diff(&combine.p, 2, + revs->dense_combined_merges, + revs); + free(combine.p.path); + continue; + } + free(combine.p.path); - /* - * "scale" the filename - */ - len = strlen(name); - max = max_len; - if (max > 50) - max = 50; - if (len > max) { - char *slash; - prefix = "..."; - max -= 3; - name += len - max; - slash = strchr(name, '/'); - if (slash) - name = slash; + /* + * Show the diff for the 'ce' if we found the one + * from the desired stage. + */ + diff_unmerge(&revs->diffopt, ce->name); + if (ce_stage(ce) != diff_unmerged_stage) + continue; } - len = max; - /* - * scale the add/delete - */ - max = max_change; - if (max + len > 70) - max = 70 - len; - - if (data->files[i]->is_binary) { - printf(" %s%-*s | Bin\n", prefix, len, name); - goto free_diffstat_file; - } - else if (data->files[i]->is_unmerged) { - printf(" %s%-*s | Unmerged\n", prefix, len, name); - goto free_diffstat_file; - } - else if (!data->files[i]->is_renamed && - (added + deleted == 0)) { - total_files--; - goto free_diffstat_file; + if (lstat(ce->name, &st) < 0) { + if (errno != ENOENT && errno != ENOTDIR) { + perror(ce->name); + continue; + } + if (silent_on_removed) + continue; + diff_addremove(&revs->diffopt, '-', ntohl(ce->ce_mode), + ce->sha1, ce->name, NULL); + continue; } + changed = ce_match_stat(ce, &st, 0); + if (!changed && !revs->diffopt.find_copies_harder) + continue; + oldmode = ntohl(ce->ce_mode); - add = added; - del = deleted; - total = add + del; - adds += add; - dels += del; + newmode = canon_mode(st.st_mode); + if (!trust_executable_bit && + S_ISREG(newmode) && S_ISREG(oldmode) && + ((newmode ^ oldmode) == 0111)) + newmode = oldmode; + diff_change(&revs->diffopt, oldmode, newmode, + ce->sha1, (changed ? null_sha1 : ce->sha1), + ce->name, NULL); - if (max_change > 0) { - total = (total * max + max_change / 2) / max_change; - add = (add * max + max_change / 2) / max_change; - del = total - add; - } - printf(" %s%-*s |%5d %.*s%.*s\n", prefix, - len, name, added + deleted, - add, pluses, del, minuses); - free_diffstat_file: - free(data->files[i]->name); - free(data->files[i]); } - free(data->files); - printf(" %d files changed, %d insertions(+), %d deletions(-)\n", - total_files, adds, dels); -} - -#define FIRST_FEW_BYTES 8000 -static int mmfile_is_binary(mmfile_t *mf) -{ - long sz = mf->size; - if (FIRST_FEW_BYTES < sz) - sz = FIRST_FEW_BYTES; - if (memchr(mf->ptr, 0, sz)) - return 1; + diffcore_std(&revs->diffopt); + diff_flush(&revs->diffopt); return 0; } -static void builtin_diff(const char *name_a, - const char *name_b, - struct diff_filespec *one, - struct diff_filespec *two, - const char *xfrm_msg, - int complete_rewrite) -{ - mmfile_t mf1, mf2; - const char *lbl[2]; - char *a_one, *b_two; - - a_one = quote_two("a/", name_a); - b_two = quote_two("b/", name_b); - lbl[0] = DIFF_FILE_VALID(one) ? a_one : "/dev/null"; - lbl[1] = DIFF_FILE_VALID(two) ? b_two : "/dev/null"; - printf("diff --git %s %s\n", a_one, b_two); - if (lbl[0][0] == '/') { - /* /dev/null */ - printf("new file mode %06o\n", two->mode); - if (xfrm_msg && xfrm_msg[0]) - puts(xfrm_msg); - } - else if (lbl[1][0] == '/') { - printf("deleted file mode %06o\n", one->mode); - if (xfrm_msg && xfrm_msg[0]) - puts(xfrm_msg); - } - else { - if (one->mode != two->mode) { - printf("old mode %06o\n", one->mode); - printf("new mode %06o\n", two->mode); - } - if (xfrm_msg && xfrm_msg[0]) - puts(xfrm_msg); - /* - * we do not run diff between different kind - * of objects. - */ - if ((one->mode ^ two->mode) & S_IFMT) - goto free_ab_and_return; - if (complete_rewrite) { - emit_rewrite_diff(name_a, name_b, one, two); - goto free_ab_and_return; - } - } - - if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) - die("unable to read files to diff"); - - if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) - printf("Binary files %s and %s differ\n", lbl[0], lbl[1]); - else { - /* Crazy xdl interfaces.. */ - const char *diffopts = getenv("GIT_DIFF_OPTS"); - xpparam_t xpp; - xdemitconf_t xecfg; - xdemitcb_t ecb; - struct emit_callback ecbdata; - - ecbdata.label_path = lbl; - xpp.flags = XDF_NEED_MINIMAL; - xecfg.ctxlen = 3; - xecfg.flags = XDL_EMIT_FUNCNAMES; - if (!diffopts) - ; - else if (!strncmp(diffopts, "--unified=", 10)) - xecfg.ctxlen = strtoul(diffopts + 10, NULL, 10); - else if (!strncmp(diffopts, "-u", 2)) - xecfg.ctxlen = strtoul(diffopts + 2, NULL, 10); - ecb.outf = fn_out; - ecb.priv = &ecbdata; - xdl_diff(&mf1, &mf2, &xpp, &xecfg, &ecb); - } - - free_ab_and_return: - free(a_one); - free(b_two); - return; -} - -static void builtin_diffstat(const char *name_a, const char *name_b, - struct diff_filespec *one, - struct diff_filespec *two, - struct diffstat_t *diffstat) -{ - mmfile_t mf1, mf2; - struct diffstat_file *data; - - data = diffstat_add(diffstat, name_a, name_b); - - if (!one || !two) { - data->is_unmerged = 1; - return; - } - - if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) - die("unable to read files to diff"); - - if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) - data->is_binary = 1; - else { - /* Crazy xdl interfaces.. */ - xpparam_t xpp; - xdemitconf_t xecfg; - xdemitcb_t ecb; - - xpp.flags = XDF_NEED_MINIMAL; - xecfg.ctxlen = 0; - xecfg.flags = 0; - ecb.outf = xdiff_outf; - ecb.priv = diffstat; - xdl_diff(&mf1, &mf2, &xpp, &xecfg, &ecb); - } -} - -struct diff_filespec *alloc_filespec(const char *path) -{ - int namelen = strlen(path); - struct diff_filespec *spec = xmalloc(sizeof(*spec) + namelen + 1); - - memset(spec, 0, sizeof(*spec)); - spec->path = (char *)(spec + 1); - memcpy(spec->path, path, namelen+1); - return spec; -} - -void fill_filespec(struct diff_filespec *spec, const unsigned char *sha1, - unsigned short mode) -{ - if (mode) { - spec->mode = canon_mode(mode); - memcpy(spec->sha1, sha1, 20); - spec->sha1_valid = !!memcmp(sha1, null_sha1, 20); - } -} - /* - * Given a name and sha1 pair, if the dircache tells us the file in - * the work tree has that object contents, return true, so that - * prepare_temp_file() does not have to inflate and extract. + * diff-index */ -static int work_tree_matches(const char *name, const unsigned char *sha1) -{ - struct cache_entry *ce; - struct stat st; - int pos, len; - - /* We do not read the cache ourselves here, because the - * benchmark with my previous version that always reads cache - * shows that it makes things worse for diff-tree comparing - * two linux-2.6 kernel trees in an already checked out work - * tree. This is because most diff-tree comparisons deal with - * only a small number of files, while reading the cache is - * expensive for a large project, and its cost outweighs the - * savings we get by not inflating the object to a temporary - * file. Practically, this code only helps when we are used - * by diff-cache --cached, which does read the cache before - * calling us. - */ - if (!active_cache) - return 0; - len = strlen(name); - pos = cache_name_pos(name, len); - if (pos < 0) - return 0; - ce = active_cache[pos]; - if ((lstat(name, &st) < 0) || - !S_ISREG(st.st_mode) || /* careful! */ - ce_match_stat(ce, &st, 0) || - memcmp(sha1, ce->sha1, 20)) - return 0; - /* we return 1 only when we can stat, it is a regular file, - * stat information matches, and sha1 recorded in the cache - * matches. I.e. we know the file in the work tree really is - * the same as the <name, sha1> pair. - */ - return 1; -} - -static struct sha1_size_cache { - unsigned char sha1[20]; - unsigned long size; -} **sha1_size_cache; -static int sha1_size_cache_nr, sha1_size_cache_alloc; - -static struct sha1_size_cache *locate_size_cache(unsigned char *sha1, - int find_only, - unsigned long size) +/* A file entry went away or appeared */ +static void diff_index_show_file(struct rev_info *revs, + const char *prefix, + struct cache_entry *ce, + unsigned char *sha1, unsigned int mode) { - int first, last; - struct sha1_size_cache *e; - - first = 0; - last = sha1_size_cache_nr; - while (last > first) { - int cmp, next = (last + first) >> 1; - e = sha1_size_cache[next]; - cmp = memcmp(e->sha1, sha1, 20); - if (!cmp) - return e; - if (cmp < 0) { - last = next; - continue; - } - first = next+1; - } - /* not found */ - if (find_only) - return NULL; - /* insert to make it at "first" */ - if (sha1_size_cache_alloc <= sha1_size_cache_nr) { - sha1_size_cache_alloc = alloc_nr(sha1_size_cache_alloc); - sha1_size_cache = xrealloc(sha1_size_cache, - sha1_size_cache_alloc * - sizeof(*sha1_size_cache)); - } - sha1_size_cache_nr++; - if (first < sha1_size_cache_nr) - memmove(sha1_size_cache + first + 1, sha1_size_cache + first, - (sha1_size_cache_nr - first - 1) * - sizeof(*sha1_size_cache)); - e = xmalloc(sizeof(struct sha1_size_cache)); - sha1_size_cache[first] = e; - memcpy(e->sha1, sha1, 20); - e->size = size; - return e; + diff_addremove(&revs->diffopt, prefix[0], ntohl(mode), + sha1, ce->name, NULL); } -/* - * While doing rename detection and pickaxe operation, we may need to - * grab the data for the blob (or file) for our own in-core comparison. - * diff_filespec has data and size fields for this purpose. - */ -int diff_populate_filespec(struct diff_filespec *s, int size_only) +static int get_stat_data(struct cache_entry *ce, + unsigned char **sha1p, + unsigned int *modep, + int cached, int match_missing) { - int err = 0; - if (!DIFF_FILE_VALID(s)) - die("internal error: asking to populate invalid file."); - if (S_ISDIR(s->mode)) - return -1; - - if (!use_size_cache) - size_only = 0; + unsigned char *sha1 = ce->sha1; + unsigned int mode = ce->ce_mode; - if (s->data) - return err; - if (!s->sha1_valid || - work_tree_matches(s->path, s->sha1)) { + if (!cached) { + static unsigned char no_sha1[20]; + int changed; struct stat st; - int fd; - if (lstat(s->path, &st) < 0) { - if (errno == ENOENT) { - err_empty: - err = -1; - empty: - s->data = ""; - s->size = 0; - return err; - } - } - s->size = st.st_size; - if (!s->size) - goto empty; - if (size_only) - return 0; - if (S_ISLNK(st.st_mode)) { - int ret; - s->data = xmalloc(s->size); - s->should_free = 1; - ret = readlink(s->path, s->data, s->size); - if (ret < 0) { - free(s->data); - goto err_empty; - } - return 0; - } - fd = open(s->path, O_RDONLY); - if (fd < 0) - goto err_empty; - s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0); - close(fd); - if (s->data == MAP_FAILED) - goto err_empty; - s->should_munmap = 1; - } - else { - char type[20]; - struct sha1_size_cache *e; - - if (size_only) { - e = locate_size_cache(s->sha1, 1, 0); - if (e) { - s->size = e->size; + if (lstat(ce->name, &st) < 0) { + if (errno == ENOENT && match_missing) { + *sha1p = sha1; + *modep = mode; return 0; } - if (!sha1_object_info(s->sha1, type, &s->size)) - locate_size_cache(s->sha1, 0, s->size); - } - else { - s->data = read_sha1_file(s->sha1, type, &s->size); - s->should_free = 1; - } - } - return 0; -} - -void diff_free_filespec_data(struct diff_filespec *s) -{ - if (s->should_free) - free(s->data); - else if (s->should_munmap) - munmap(s->data, s->size); - s->should_free = s->should_munmap = 0; - s->data = NULL; - free(s->cnt_data); - s->cnt_data = NULL; -} - -static void prep_temp_blob(struct diff_tempfile *temp, - void *blob, - unsigned long size, - const unsigned char *sha1, - int mode) -{ - int fd; - - fd = git_mkstemp(temp->tmp_path, TEMPFILE_PATH_LEN, ".diff_XXXXXX"); - if (fd < 0) - die("unable to create temp-file"); - if (write(fd, blob, size) != size) - die("unable to write temp-file"); - close(fd); - temp->name = temp->tmp_path; - strcpy(temp->hex, sha1_to_hex(sha1)); - temp->hex[40] = 0; - sprintf(temp->mode, "%06o", mode); -} - -static void prepare_temp_file(const char *name, - struct diff_tempfile *temp, - struct diff_filespec *one) -{ - if (!DIFF_FILE_VALID(one)) { - not_a_valid_file: - /* A '-' entry produces this for file-2, and - * a '+' entry produces this for file-1. - */ - temp->name = "/dev/null"; - strcpy(temp->hex, "."); - strcpy(temp->mode, "."); - return; - } - - if (!one->sha1_valid || - work_tree_matches(name, one->sha1)) { - struct stat st; - if (lstat(name, &st) < 0) { - if (errno == ENOENT) - goto not_a_valid_file; - die("stat(%s): %s", name, strerror(errno)); - } - if (S_ISLNK(st.st_mode)) { - int ret; - char buf[PATH_MAX + 1]; /* ought to be SYMLINK_MAX */ - if (sizeof(buf) <= st.st_size) - die("symlink too long: %s", name); - ret = readlink(name, buf, st.st_size); - if (ret < 0) - die("readlink(%s)", name); - prep_temp_blob(temp, buf, st.st_size, - (one->sha1_valid ? - one->sha1 : null_sha1), - (one->sha1_valid ? - one->mode : S_IFLNK)); + return -1; } - else { - /* we can borrow from the file in the work tree */ - temp->name = name; - if (!one->sha1_valid) - strcpy(temp->hex, sha1_to_hex(null_sha1)); - else - strcpy(temp->hex, sha1_to_hex(one->sha1)); - /* Even though we may sometimes borrow the - * contents from the work tree, we always want - * one->mode. mode is trustworthy even when - * !(one->sha1_valid), as long as - * DIFF_FILE_VALID(one). - */ - sprintf(temp->mode, "%06o", one->mode); + changed = ce_match_stat(ce, &st, 0); + if (changed) { + mode = create_ce_mode(st.st_mode); + if (!trust_executable_bit && S_ISREG(st.st_mode)) + mode = ce->ce_mode; + sha1 = no_sha1; } - return; } - else { - if (diff_populate_filespec(one, 0)) - die("cannot read data blob for %s", one->path); - prep_temp_blob(temp, one->data, one->size, - one->sha1, one->mode); - } -} -static void remove_tempfile(void) -{ - int i; - - for (i = 0; i < 2; i++) - if (diff_temp[i].name == diff_temp[i].tmp_path) { - unlink(diff_temp[i].name); - diff_temp[i].name = NULL; - } -} - -static void remove_tempfile_on_signal(int signo) -{ - remove_tempfile(); - signal(SIGINT, SIG_DFL); - raise(signo); + *sha1p = sha1; + *modep = mode; + return 0; } -static int spawn_prog(const char *pgm, const char **arg) +static void show_new_file(struct rev_info *revs, + struct cache_entry *new, + int cached, int match_missing) { - pid_t pid; - int status; - - fflush(NULL); - pid = fork(); - if (pid < 0) - die("unable to fork"); - if (!pid) { - execvp(pgm, (char *const*) arg); - exit(255); - } - - while (waitpid(pid, &status, 0) < 0) { - if (errno == EINTR) - continue; - return -1; - } + unsigned char *sha1; + unsigned int mode; - /* Earlier we did not check the exit status because - * diff exits non-zero if files are different, and - * we are not interested in knowing that. It was a - * mistake which made it harder to quit a diff-* - * session that uses the git-apply-patch-script as - * the GIT_EXTERNAL_DIFF. A custom GIT_EXTERNAL_DIFF - * should also exit non-zero only when it wants to - * abort the entire diff-* session. + /* New file in the index: it might actually be different in + * the working copy. */ - if (WIFEXITED(status) && !WEXITSTATUS(status)) - return 0; - return -1; -} - -/* An external diff command takes: - * - * diff-cmd name infile1 infile1-sha1 infile1-mode \ - * infile2 infile2-sha1 infile2-mode [ rename-to ] - * - */ -static void run_external_diff(const char *pgm, - const char *name, - const char *other, - struct diff_filespec *one, - struct diff_filespec *two, - const char *xfrm_msg, - int complete_rewrite) -{ - const char *spawn_arg[10]; - struct diff_tempfile *temp = diff_temp; - int retval; - static int atexit_asked = 0; - const char *othername; - const char **arg = &spawn_arg[0]; - - othername = (other? other : name); - if (one && two) { - prepare_temp_file(name, &temp[0], one); - prepare_temp_file(othername, &temp[1], two); - if (! atexit_asked && - (temp[0].name == temp[0].tmp_path || - temp[1].name == temp[1].tmp_path)) { - atexit_asked = 1; - atexit(remove_tempfile); - } - signal(SIGINT, remove_tempfile_on_signal); - } - - if (one && two) { - *arg++ = pgm; - *arg++ = name; - *arg++ = temp[0].name; - *arg++ = temp[0].hex; - *arg++ = temp[0].mode; - *arg++ = temp[1].name; - *arg++ = temp[1].hex; - *arg++ = temp[1].mode; - if (other) { - *arg++ = other; - *arg++ = xfrm_msg; - } - } else { - *arg++ = pgm; - *arg++ = name; - } - *arg = NULL; - retval = spawn_prog(pgm, spawn_arg); - remove_tempfile(); - if (retval) { - fprintf(stderr, "external diff died, stopping at %s.\n", name); - exit(1); - } -} - -static void run_diff_cmd(const char *pgm, - const char *name, - const char *other, - struct diff_filespec *one, - struct diff_filespec *two, - const char *xfrm_msg, - int complete_rewrite) -{ - if (pgm) { - run_external_diff(pgm, name, other, one, two, xfrm_msg, - complete_rewrite); + if (get_stat_data(new, &sha1, &mode, cached, match_missing) < 0) return; - } - if (one && two) - builtin_diff(name, other ? other : name, - one, two, xfrm_msg, complete_rewrite); - else - printf("* Unmerged path %s\n", name); -} -static void diff_fill_sha1_info(struct diff_filespec *one) -{ - if (DIFF_FILE_VALID(one)) { - if (!one->sha1_valid) { - struct stat st; - if (lstat(one->path, &st) < 0) - die("stat %s", one->path); - if (index_path(one->sha1, one->path, &st, 0)) - die("cannot hash %s\n", one->path); - } - } - else - memset(one->sha1, 0, 20); + diff_index_show_file(revs, "+", new, sha1, mode); } -static void run_diff(struct diff_filepair *p, struct diff_options *o) +static int show_modified(struct rev_info *revs, + struct cache_entry *old, + struct cache_entry *new, + int report_missing, + int cached, int match_missing) { - const char *pgm = external_diff(); - char msg[PATH_MAX*2+300], *xfrm_msg; - struct diff_filespec *one; - struct diff_filespec *two; - const char *name; - const char *other; - char *name_munged, *other_munged; - int complete_rewrite = 0; - int len; - - if (DIFF_PAIR_UNMERGED(p)) { - /* unmerged */ - run_diff_cmd(pgm, p->one->path, NULL, NULL, NULL, NULL, 0); - return; - } - - name = p->one->path; - other = (strcmp(name, p->two->path) ? p->two->path : NULL); - name_munged = quote_one(name); - other_munged = quote_one(other); - one = p->one; two = p->two; - - diff_fill_sha1_info(one); - diff_fill_sha1_info(two); - - len = 0; - switch (p->status) { - case DIFF_STATUS_COPIED: - len += snprintf(msg + len, sizeof(msg) - len, - "similarity index %d%%\n" - "copy from %s\n" - "copy to %s\n", - (int)(0.5 + p->score * 100.0/MAX_SCORE), - name_munged, other_munged); - break; - case DIFF_STATUS_RENAMED: - len += snprintf(msg + len, sizeof(msg) - len, - "similarity index %d%%\n" - "rename from %s\n" - "rename to %s\n", - (int)(0.5 + p->score * 100.0/MAX_SCORE), - name_munged, other_munged); - break; - case DIFF_STATUS_MODIFIED: - if (p->score) { - len += snprintf(msg + len, sizeof(msg) - len, - "dissimilarity index %d%%\n", - (int)(0.5 + p->score * - 100.0/MAX_SCORE)); - complete_rewrite = 1; - break; - } - /* fallthru */ - default: - /* nothing */ - ; - } - - if (memcmp(one->sha1, two->sha1, 20)) { - char one_sha1[41]; - int abbrev = o->full_index ? 40 : DEFAULT_ABBREV; - memcpy(one_sha1, sha1_to_hex(one->sha1), 41); - - len += snprintf(msg + len, sizeof(msg) - len, - "index %.*s..%.*s", - abbrev, one_sha1, abbrev, - sha1_to_hex(two->sha1)); - if (one->mode == two->mode) - len += snprintf(msg + len, sizeof(msg) - len, - " %06o", one->mode); - len += snprintf(msg + len, sizeof(msg) - len, "\n"); - } - - if (len) - msg[--len] = 0; - xfrm_msg = len ? msg : NULL; + unsigned int mode, oldmode; + unsigned char *sha1; - if (!pgm && - DIFF_FILE_VALID(one) && DIFF_FILE_VALID(two) && - (S_IFMT & one->mode) != (S_IFMT & two->mode)) { - /* a filepair that changes between file and symlink - * needs to be split into deletion and creation. - */ - struct diff_filespec *null = alloc_filespec(two->path); - run_diff_cmd(NULL, name, other, one, null, xfrm_msg, 0); - free(null); - null = alloc_filespec(one->path); - run_diff_cmd(NULL, name, other, null, two, xfrm_msg, 0); - free(null); - } - else - run_diff_cmd(pgm, name, other, one, two, xfrm_msg, - complete_rewrite); - - free(name_munged); - free(other_munged); -} - -static void run_diffstat(struct diff_filepair *p, struct diff_options *o, - struct diffstat_t *diffstat) -{ - const char *name; - const char *other; - - if (DIFF_PAIR_UNMERGED(p)) { - /* unmerged */ - builtin_diffstat(p->one->path, NULL, NULL, NULL, diffstat); - return; - } - - name = p->one->path; - other = (strcmp(name, p->two->path) ? p->two->path : NULL); - - diff_fill_sha1_info(p->one); - diff_fill_sha1_info(p->two); - - builtin_diffstat(name, other, p->one, p->two, diffstat); -} - -void diff_setup(struct diff_options *options) -{ - memset(options, 0, sizeof(*options)); - options->output_format = DIFF_FORMAT_RAW; - options->line_termination = '\n'; - options->break_opt = -1; - options->rename_limit = -1; - - options->change = diff_change; - options->add_remove = diff_addremove; -} - -int diff_setup_done(struct diff_options *options) -{ - if ((options->find_copies_harder && - options->detect_rename != DIFF_DETECT_COPY) || - (0 <= options->rename_limit && !options->detect_rename)) + if (get_stat_data(new, &sha1, &mode, cached, match_missing) < 0) { + if (report_missing) + diff_index_show_file(revs, "-", old, + old->sha1, old->ce_mode); return -1; - - /* - * These cases always need recursive; we do not drop caller-supplied - * recursive bits for other formats here. - */ - if ((options->output_format == DIFF_FORMAT_PATCH) || - (options->output_format == DIFF_FORMAT_DIFFSTAT)) - options->recursive = 1; - - if (options->detect_rename && options->rename_limit < 0) - options->rename_limit = diff_rename_limit_default; - if (options->setup & DIFF_SETUP_USE_CACHE) { - if (!active_cache) - /* read-cache does not die even when it fails - * so it is safe for us to do this here. Also - * it does not smudge active_cache or active_nr - * when it fails, so we do not have to worry about - * cleaning it up ourselves either. - */ - read_cache(); } - if (options->setup & DIFF_SETUP_USE_SIZE_CACHE) - use_size_cache = 1; - if (options->abbrev <= 0 || 40 < options->abbrev) - options->abbrev = 40; /* full */ - - return 0; -} -int diff_opt_parse(struct diff_options *options, const char **av, int ac) -{ - const char *arg = av[0]; - if (!strcmp(arg, "-p") || !strcmp(arg, "-u")) - options->output_format = DIFF_FORMAT_PATCH; - else if (!strcmp(arg, "--patch-with-raw")) { - options->output_format = DIFF_FORMAT_PATCH; - options->with_raw = 1; - } - else if (!strcmp(arg, "--stat")) - options->output_format = DIFF_FORMAT_DIFFSTAT; - else if (!strcmp(arg, "--patch-with-stat")) { - options->output_format = DIFF_FORMAT_PATCH; - options->with_stat = 1; - } - else if (!strcmp(arg, "-z")) - options->line_termination = 0; - else if (!strncmp(arg, "-l", 2)) - options->rename_limit = strtoul(arg+2, NULL, 10); - else if (!strcmp(arg, "--full-index")) - options->full_index = 1; - else if (!strcmp(arg, "--name-only")) - options->output_format = DIFF_FORMAT_NAME; - else if (!strcmp(arg, "--name-status")) - options->output_format = DIFF_FORMAT_NAME_STATUS; - else if (!strcmp(arg, "-R")) - options->reverse_diff = 1; - else if (!strncmp(arg, "-S", 2)) - options->pickaxe = arg + 2; - else if (!strcmp(arg, "-s")) - options->output_format = DIFF_FORMAT_NO_OUTPUT; - else if (!strncmp(arg, "-O", 2)) - options->orderfile = arg + 2; - else if (!strncmp(arg, "--diff-filter=", 14)) - options->filter = arg + 14; - else if (!strcmp(arg, "--pickaxe-all")) - options->pickaxe_opts = DIFF_PICKAXE_ALL; - else if (!strcmp(arg, "--pickaxe-regex")) - options->pickaxe_opts = DIFF_PICKAXE_REGEX; - else if (!strncmp(arg, "-B", 2)) { - if ((options->break_opt = - diff_scoreopt_parse(arg)) == -1) - return -1; - } - else if (!strncmp(arg, "-M", 2)) { - if ((options->rename_score = - diff_scoreopt_parse(arg)) == -1) - return -1; - options->detect_rename = DIFF_DETECT_RENAME; - } - else if (!strncmp(arg, "-C", 2)) { - if ((options->rename_score = - diff_scoreopt_parse(arg)) == -1) - return -1; - options->detect_rename = DIFF_DETECT_COPY; - } - else if (!strcmp(arg, "--find-copies-harder")) - options->find_copies_harder = 1; - else if (!strcmp(arg, "--abbrev")) - options->abbrev = DEFAULT_ABBREV; - else if (!strncmp(arg, "--abbrev=", 9)) { - options->abbrev = strtoul(arg + 9, NULL, 10); - if (options->abbrev < MINIMUM_ABBREV) - options->abbrev = MINIMUM_ABBREV; - else if (40 < options->abbrev) - options->abbrev = 40; - } - else + oldmode = old->ce_mode; + if (mode == oldmode && !memcmp(sha1, old->sha1, 20) && + !revs->diffopt.find_copies_harder) return 0; - return 1; -} - -static int parse_num(const char **cp_p) -{ - unsigned long num, scale; - int ch, dot; - const char *cp = *cp_p; - - num = 0; - scale = 1; - dot = 0; - for(;;) { - ch = *cp; - if ( !dot && ch == '.' ) { - scale = 1; - dot = 1; - } else if ( ch == '%' ) { - scale = dot ? scale*100 : 100; - cp++; /* % is always at the end */ - break; - } else if ( ch >= '0' && ch <= '9' ) { - if ( scale < 100000 ) { - scale *= 10; - num = (num*10) + (ch-'0'); - } - } else { - break; - } - cp++; - } - *cp_p = cp; - - /* user says num divided by scale and we say internally that - * is MAX_SCORE * num / scale. - */ - return (num >= scale) ? MAX_SCORE : (MAX_SCORE * num / scale); -} - -int diff_scoreopt_parse(const char *opt) -{ - int opt1, opt2, cmd; - - if (*opt++ != '-') - return -1; - cmd = *opt++; - if (cmd != 'M' && cmd != 'C' && cmd != 'B') - return -1; /* that is not a -M, -C nor -B option */ - - opt1 = parse_num(&opt); - if (cmd != 'B') - opt2 = 0; - else { - if (*opt == 0) - opt2 = 0; - else if (*opt != '/') - return -1; /* we expect -B80/99 or -B80 */ - else { - opt++; - opt2 = parse_num(&opt); - } - } - if (*opt != 0) - return -1; - return opt1 | (opt2 << 16); -} - -struct diff_queue_struct diff_queued_diff; - -void diff_q(struct diff_queue_struct *queue, struct diff_filepair *dp) -{ - if (queue->alloc <= queue->nr) { - queue->alloc = alloc_nr(queue->alloc); - queue->queue = xrealloc(queue->queue, - sizeof(dp) * queue->alloc); - } - queue->queue[queue->nr++] = dp; -} - -struct diff_filepair *diff_queue(struct diff_queue_struct *queue, - struct diff_filespec *one, - struct diff_filespec *two) -{ - struct diff_filepair *dp = xmalloc(sizeof(*dp)); - dp->one = one; - dp->two = two; - dp->score = 0; - dp->status = 0; - dp->source_stays = 0; - dp->broken_pair = 0; - if (queue) - diff_q(queue, dp); - return dp; -} - -void diff_free_filepair(struct diff_filepair *p) -{ - diff_free_filespec_data(p->one); - diff_free_filespec_data(p->two); - free(p->one); - free(p->two); - free(p); -} - -/* This is different from find_unique_abbrev() in that - * it stuffs the result with dots for alignment. - */ -const char *diff_unique_abbrev(const unsigned char *sha1, int len) -{ - int abblen; - const char *abbrev; - if (len == 40) - return sha1_to_hex(sha1); - - abbrev = find_unique_abbrev(sha1, len); - if (!abbrev) - return sha1_to_hex(sha1); - abblen = strlen(abbrev); - if (abblen < 37) { - static char hex[41]; - if (len < abblen && abblen <= len + 2) - sprintf(hex, "%s%.*s", abbrev, len+3-abblen, ".."); - else - sprintf(hex, "%s...", abbrev); - return hex; - } - return sha1_to_hex(sha1); -} - -static void diff_flush_raw(struct diff_filepair *p, - int line_termination, - int inter_name_termination, - struct diff_options *options, - int output_format) -{ - int two_paths; - char status[10]; - int abbrev = options->abbrev; - const char *path_one, *path_two; - - path_one = p->one->path; - path_two = p->two->path; - if (line_termination) { - path_one = quote_one(path_one); - path_two = quote_one(path_two); - } - - if (p->score) - sprintf(status, "%c%03d", p->status, - (int)(0.5 + p->score * 100.0/MAX_SCORE)); - else { - status[0] = p->status; - status[1] = 0; - } - switch (p->status) { - case DIFF_STATUS_COPIED: - case DIFF_STATUS_RENAMED: - two_paths = 1; - break; - case DIFF_STATUS_ADDED: - case DIFF_STATUS_DELETED: - two_paths = 0; - break; - default: - two_paths = 0; - break; - } - if (output_format != DIFF_FORMAT_NAME_STATUS) { - printf(":%06o %06o %s ", - p->one->mode, p->two->mode, - diff_unique_abbrev(p->one->sha1, abbrev)); - printf("%s ", - diff_unique_abbrev(p->two->sha1, abbrev)); - } - printf("%s%c%s", status, inter_name_termination, path_one); - if (two_paths) - printf("%c%s", inter_name_termination, path_two); - putchar(line_termination); - if (path_one != p->one->path) - free((void*)path_one); - if (path_two != p->two->path) - free((void*)path_two); -} - -static void diff_flush_name(struct diff_filepair *p, - int inter_name_termination, - int line_termination) -{ - char *path = p->two->path; - - if (line_termination) - path = quote_one(p->two->path); - else - path = p->two->path; - printf("%s%c", path, line_termination); - if (p->two->path != path) - free(path); -} - -int diff_unmodified_pair(struct diff_filepair *p) -{ - /* This function is written stricter than necessary to support - * the currently implemented transformers, but the idea is to - * let transformers to produce diff_filepairs any way they want, - * and filter and clean them up here before producing the output. - */ - struct diff_filespec *one, *two; - if (DIFF_PAIR_UNMERGED(p)) - return 0; /* unmerged is interesting */ + mode = ntohl(mode); + oldmode = ntohl(oldmode); - one = p->one; - two = p->two; - - /* deletion, addition, mode or type change - * and rename are all interesting. - */ - if (DIFF_FILE_VALID(one) != DIFF_FILE_VALID(two) || - DIFF_PAIR_MODE_CHANGED(p) || - strcmp(one->path, two->path)) - return 0; - - /* both are valid and point at the same path. that is, we are - * dealing with a change. - */ - if (one->sha1_valid && two->sha1_valid && - !memcmp(one->sha1, two->sha1, sizeof(one->sha1))) - return 1; /* no change */ - if (!one->sha1_valid && !two->sha1_valid) - return 1; /* both look at the same file on the filesystem. */ + diff_change(&revs->diffopt, oldmode, mode, + old->sha1, sha1, old->name, NULL); return 0; } -static void diff_flush_patch(struct diff_filepair *p, struct diff_options *o) +static int diff_cache(struct rev_info *revs, + struct cache_entry **ac, int entries, + const char **pathspec, + int cached, int match_missing) { - if (diff_unmodified_pair(p)) - return; + while (entries) { + struct cache_entry *ce = *ac; + int same = (entries > 1) && ce_same_name(ce, ac[1]); - if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) || - (DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode))) - return; /* no tree diffs in patch format */ + if (!ce_path_match(ce, pathspec)) + goto skip_entry; - run_diff(p, o); -} - -static void diff_flush_stat(struct diff_filepair *p, struct diff_options *o, - struct diffstat_t *diffstat) -{ - if (diff_unmodified_pair(p)) - return; - - if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) || - (DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode))) - return; /* no tree diffs in patch format */ - - run_diffstat(p, o, diffstat); -} - -int diff_queue_is_empty(void) -{ - struct diff_queue_struct *q = &diff_queued_diff; - int i; - for (i = 0; i < q->nr; i++) - if (!diff_unmodified_pair(q->queue[i])) - return 0; - return 1; -} - -#if DIFF_DEBUG -void diff_debug_filespec(struct diff_filespec *s, int x, const char *one) -{ - fprintf(stderr, "queue[%d] %s (%s) %s %06o %s\n", - x, one ? one : "", - s->path, - DIFF_FILE_VALID(s) ? "valid" : "invalid", - s->mode, - s->sha1_valid ? sha1_to_hex(s->sha1) : ""); - fprintf(stderr, "queue[%d] %s size %lu flags %d\n", - x, one ? one : "", - s->size, s->xfrm_flags); -} - -void diff_debug_filepair(const struct diff_filepair *p, int i) -{ - diff_debug_filespec(p->one, i, "one"); - diff_debug_filespec(p->two, i, "two"); - fprintf(stderr, "score %d, status %c stays %d broken %d\n", - p->score, p->status ? p->status : '?', - p->source_stays, p->broken_pair); -} - -void diff_debug_queue(const char *msg, struct diff_queue_struct *q) -{ - int i; - if (msg) - fprintf(stderr, "%s\n", msg); - fprintf(stderr, "q->nr = %d\n", q->nr); - for (i = 0; i < q->nr; i++) { - struct diff_filepair *p = q->queue[i]; - diff_debug_filepair(p, i); - } -} -#endif - -static void diff_resolve_rename_copy(void) -{ - int i, j; - struct diff_filepair *p, *pp; - struct diff_queue_struct *q = &diff_queued_diff; - - diff_debug_queue("resolve-rename-copy", q); - - for (i = 0; i < q->nr; i++) { - p = q->queue[i]; - p->status = 0; /* undecided */ - if (DIFF_PAIR_UNMERGED(p)) - p->status = DIFF_STATUS_UNMERGED; - else if (!DIFF_FILE_VALID(p->one)) - p->status = DIFF_STATUS_ADDED; - else if (!DIFF_FILE_VALID(p->two)) - p->status = DIFF_STATUS_DELETED; - else if (DIFF_PAIR_TYPE_CHANGED(p)) - p->status = DIFF_STATUS_TYPE_CHANGED; - - /* from this point on, we are dealing with a pair - * whose both sides are valid and of the same type, i.e. - * either in-place edit or rename/copy edit. - */ - else if (DIFF_PAIR_RENAME(p)) { - if (p->source_stays) { - p->status = DIFF_STATUS_COPIED; - continue; + switch (ce_stage(ce)) { + case 0: + /* No stage 1 entry? That means it's a new file */ + if (!same) { + show_new_file(revs, ce, cached, match_missing); + break; } - /* See if there is some other filepair that - * copies from the same source as us. If so - * we are a copy. Otherwise we are either a - * copy if the path stays, or a rename if it - * does not, but we already handled "stays" case. + /* Show difference between old and new */ + show_modified(revs,ac[1], ce, 1, + cached, match_missing); + break; + case 1: + /* No stage 3 (merge) entry? + * That means it's been deleted. */ - for (j = i + 1; j < q->nr; j++) { - pp = q->queue[j]; - if (strcmp(pp->one->path, p->one->path)) - continue; /* not us */ - if (!DIFF_PAIR_RENAME(pp)) - continue; /* not a rename/copy */ - /* pp is a rename/copy from the same source */ - p->status = DIFF_STATUS_COPIED; + if (!same) { + diff_index_show_file(revs, "-", ce, + ce->sha1, ce->ce_mode); break; } - if (!p->status) - p->status = DIFF_STATUS_RENAMED; - } - else if (memcmp(p->one->sha1, p->two->sha1, 20) || - p->one->mode != p->two->mode) - p->status = DIFF_STATUS_MODIFIED; - else { - /* This is a "no-change" entry and should not - * happen anymore, but prepare for broken callers. + /* We come here with ce pointing at stage 1 + * (original tree) and ac[1] pointing at stage + * 3 (unmerged). show-modified with + * report-missing set to false does not say the + * file is deleted but reports true if work + * tree does not have it, in which case we + * fall through to report the unmerged state. + * Otherwise, we show the differences between + * the original tree and the work tree. */ - error("feeding unmodified %s to diffcore", - p->one->path); - p->status = DIFF_STATUS_UNKNOWN; - } - } - diff_debug_queue("resolve-rename-copy done", q); -} - -static void flush_one_pair(struct diff_filepair *p, - int diff_output_format, - struct diff_options *options, - struct diffstat_t *diffstat) -{ - int inter_name_termination = '\t'; - int line_termination = options->line_termination; - if (!line_termination) - inter_name_termination = 0; - - switch (p->status) { - case DIFF_STATUS_UNKNOWN: - break; - case 0: - die("internal error in diff-resolve-rename-copy"); - break; - default: - switch (diff_output_format) { - case DIFF_FORMAT_DIFFSTAT: - diff_flush_stat(p, options, diffstat); - break; - case DIFF_FORMAT_PATCH: - diff_flush_patch(p, options); - break; - case DIFF_FORMAT_RAW: - case DIFF_FORMAT_NAME_STATUS: - diff_flush_raw(p, line_termination, - inter_name_termination, - options, diff_output_format); - break; - case DIFF_FORMAT_NAME: - diff_flush_name(p, - inter_name_termination, - line_termination); - break; - case DIFF_FORMAT_NO_OUTPUT: + if (!cached && + !show_modified(revs, ce, ac[1], 0, + cached, match_missing)) + break; + /* fallthru */ + case 3: + diff_unmerge(&revs->diffopt, ce->name); break; - } - } -} -void diff_flush(struct diff_options *options) -{ - struct diff_queue_struct *q = &diff_queued_diff; - int i; - int diff_output_format = options->output_format; - struct diffstat_t *diffstat = NULL; - - if (diff_output_format == DIFF_FORMAT_DIFFSTAT || options->with_stat) { - diffstat = xcalloc(sizeof (struct diffstat_t), 1); - diffstat->xm.consume = diffstat_consume; - } - - if (options->with_raw) { - for (i = 0; i < q->nr; i++) { - struct diff_filepair *p = q->queue[i]; - flush_one_pair(p, DIFF_FORMAT_RAW, options, NULL); - } - putchar(options->line_termination); - } - if (options->with_stat) { - for (i = 0; i < q->nr; i++) { - struct diff_filepair *p = q->queue[i]; - flush_one_pair(p, DIFF_FORMAT_DIFFSTAT, options, - diffstat); + default: + die("impossible cache entry stage"); } - show_stats(diffstat); - free(diffstat); - diffstat = NULL; - putchar(options->line_termination); - } - for (i = 0; i < q->nr; i++) { - struct diff_filepair *p = q->queue[i]; - flush_one_pair(p, diff_output_format, options, diffstat); - diff_free_filepair(p); - } - if (diffstat) { - show_stats(diffstat); - free(diffstat); +skip_entry: + /* + * Ignore all the different stages for this file, + * we've handled the relevant cases now. + */ + do { + ac++; + entries--; + } while (entries && ce_same_name(ce, ac[0])); } - - free(q->queue); - q->queue = NULL; - q->nr = q->alloc = 0; + return 0; } -static void diffcore_apply_filter(const char *filter) +/* + * This turns all merge entries into "stage 3". That guarantees that + * when we read in the new tree (into "stage 1"), we won't lose sight + * of the fact that we had unmerged entries. + */ +static void mark_merge_entries(void) { int i; - struct diff_queue_struct *q = &diff_queued_diff; - struct diff_queue_struct outq; - outq.queue = NULL; - outq.nr = outq.alloc = 0; - - if (!filter) - return; - - if (strchr(filter, DIFF_STATUS_FILTER_AON)) { - int found; - for (i = found = 0; !found && i < q->nr; i++) { - struct diff_filepair *p = q->queue[i]; - if (((p->status == DIFF_STATUS_MODIFIED) && - ((p->score && - strchr(filter, DIFF_STATUS_FILTER_BROKEN)) || - (!p->score && - strchr(filter, DIFF_STATUS_MODIFIED)))) || - ((p->status != DIFF_STATUS_MODIFIED) && - strchr(filter, p->status))) - found++; - } - if (found) - return; - - /* otherwise we will clear the whole queue - * by copying the empty outq at the end of this - * function, but first clear the current entries - * in the queue. - */ - for (i = 0; i < q->nr; i++) - diff_free_filepair(q->queue[i]); - } - else { - /* Only the matching ones */ - for (i = 0; i < q->nr; i++) { - struct diff_filepair *p = q->queue[i]; - - if (((p->status == DIFF_STATUS_MODIFIED) && - ((p->score && - strchr(filter, DIFF_STATUS_FILTER_BROKEN)) || - (!p->score && - strchr(filter, DIFF_STATUS_MODIFIED)))) || - ((p->status != DIFF_STATUS_MODIFIED) && - strchr(filter, p->status))) - diff_q(&outq, p); - else - diff_free_filepair(p); - } + for (i = 0; i < active_nr; i++) { + struct cache_entry *ce = active_cache[i]; + if (!ce_stage(ce)) + continue; + ce->ce_flags |= htons(CE_STAGEMASK); } - free(q->queue); - *q = outq; -} - -void diffcore_std(struct diff_options *options) -{ - if (options->break_opt != -1) - diffcore_break(options->break_opt); - if (options->detect_rename) - diffcore_rename(options); - if (options->break_opt != -1) - diffcore_merge_broken(); - if (options->pickaxe) - diffcore_pickaxe(options->pickaxe, options->pickaxe_opts); - if (options->orderfile) - diffcore_order(options->orderfile); - diff_resolve_rename_copy(); - diffcore_apply_filter(options->filter); -} - - -void diffcore_std_no_resolve(struct diff_options *options) -{ - if (options->pickaxe) - diffcore_pickaxe(options->pickaxe, options->pickaxe_opts); - if (options->orderfile) - diffcore_order(options->orderfile); - diffcore_apply_filter(options->filter); } -void diff_addremove(struct diff_options *options, - int addremove, unsigned mode, - const unsigned char *sha1, - const char *base, const char *path) +int run_diff_index(struct rev_info *revs, int cached) { - char concatpath[PATH_MAX]; - struct diff_filespec *one, *two; + int ret; + struct object *ent; + struct tree *tree; + const char *tree_name; + int match_missing = 0; - /* This may look odd, but it is a preparation for - * feeding "there are unchanged files which should - * not produce diffs, but when you are doing copy - * detection you would need them, so here they are" - * entries to the diff-core. They will be prefixed - * with something like '=' or '*' (I haven't decided - * which but should not make any difference). - * Feeding the same new and old to diff_change() - * also has the same effect. - * Before the final output happens, they are pruned after - * merged into rename/copy pairs as appropriate. + /* + * Backward compatibility wart - "diff-index -m" does + * not mean "do not ignore merges", but totally different. */ - if (options->reverse_diff) - addremove = (addremove == '+' ? '-' : - addremove == '-' ? '+' : addremove); - - if (!path) path = ""; - sprintf(concatpath, "%s%s", base, path); - one = alloc_filespec(concatpath); - two = alloc_filespec(concatpath); - - if (addremove != '+') - fill_filespec(one, sha1, mode); - if (addremove != '-') - fill_filespec(two, sha1, mode); + if (!revs->ignore_merges) + match_missing = 1; - diff_queue(&diff_queued_diff, one, two); -} - -void diff_change(struct diff_options *options, - unsigned old_mode, unsigned new_mode, - const unsigned char *old_sha1, - const unsigned char *new_sha1, - const char *base, const char *path) -{ - char concatpath[PATH_MAX]; - struct diff_filespec *one, *two; - - if (options->reverse_diff) { - unsigned tmp; - const unsigned char *tmp_c; - tmp = old_mode; old_mode = new_mode; new_mode = tmp; - tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c; + if (read_cache() < 0) { + perror("read_cache"); + return -1; } - if (!path) path = ""; - sprintf(concatpath, "%s%s", base, path); - one = alloc_filespec(concatpath); - two = alloc_filespec(concatpath); - fill_filespec(one, old_sha1, old_mode); - fill_filespec(two, new_sha1, new_mode); - - diff_queue(&diff_queued_diff, one, two); -} - -void diff_unmerge(struct diff_options *options, - const char *path) -{ - struct diff_filespec *one, *two; - one = alloc_filespec(path); - two = alloc_filespec(path); - diff_queue(&diff_queued_diff, one, two); + mark_merge_entries(); + + ent = revs->pending_objects->item; + tree_name = revs->pending_objects->name; + tree = parse_tree_indirect(ent->sha1); + if (!tree) + return error("bad tree object %s", tree_name); + if (read_tree(tree, 1, revs->prune_data)) + return error("unable to read tree object %s", tree_name); + ret = diff_cache(revs, active_cache, active_nr, revs->prune_data, + cached, match_missing); + diffcore_std(&revs->diffopt); + diff_flush(&revs->diffopt); + return ret; } diff --git a/diff.c b/diff.c new file mode 100644 index 0000000000..13b216f273 --- /dev/null +++ b/diff.c @@ -0,0 +1,1795 @@ +/* + * Copyright (C) 2005 Junio C Hamano + */ +#include <sys/types.h> +#include <sys/wait.h> +#include <signal.h> +#include "cache.h" +#include "quote.h" +#include "diff.h" +#include "diffcore.h" +#include "xdiff-interface.h" + +static int use_size_cache; + +int diff_rename_limit_default = -1; + +int git_diff_config(const char *var, const char *value) +{ + if (!strcmp(var, "diff.renamelimit")) { + diff_rename_limit_default = git_config_int(var, value); + return 0; + } + + return git_default_config(var, value); +} + +static char *quote_one(const char *str) +{ + int needlen; + char *xp; + + if (!str) + return NULL; + needlen = quote_c_style(str, NULL, NULL, 0); + if (!needlen) + return strdup(str); + xp = xmalloc(needlen + 1); + quote_c_style(str, xp, NULL, 0); + return xp; +} + +static char *quote_two(const char *one, const char *two) +{ + int need_one = quote_c_style(one, NULL, NULL, 1); + int need_two = quote_c_style(two, NULL, NULL, 1); + char *xp; + + if (need_one + need_two) { + if (!need_one) need_one = strlen(one); + if (!need_two) need_one = strlen(two); + + xp = xmalloc(need_one + need_two + 3); + xp[0] = '"'; + quote_c_style(one, xp + 1, NULL, 1); + quote_c_style(two, xp + need_one + 1, NULL, 1); + strcpy(xp + need_one + need_two + 1, "\""); + return xp; + } + need_one = strlen(one); + need_two = strlen(two); + xp = xmalloc(need_one + need_two + 1); + strcpy(xp, one); + strcpy(xp + need_one, two); + return xp; +} + +static const char *external_diff(void) +{ + static const char *external_diff_cmd = NULL; + static int done_preparing = 0; + + if (done_preparing) + return external_diff_cmd; + external_diff_cmd = getenv("GIT_EXTERNAL_DIFF"); + done_preparing = 1; + return external_diff_cmd; +} + +#define TEMPFILE_PATH_LEN 50 + +static struct diff_tempfile { + const char *name; /* filename external diff should read from */ + char hex[41]; + char mode[10]; + char tmp_path[TEMPFILE_PATH_LEN]; +} diff_temp[2]; + +static int count_lines(const char *data, int size) +{ + int count, ch, completely_empty = 1, nl_just_seen = 0; + count = 0; + while (0 < size--) { + ch = *data++; + if (ch == '\n') { + count++; + nl_just_seen = 1; + completely_empty = 0; + } + else { + nl_just_seen = 0; + completely_empty = 0; + } + } + if (completely_empty) + return 0; + if (!nl_just_seen) + count++; /* no trailing newline */ + return count; +} + +static void print_line_count(int count) +{ + switch (count) { + case 0: + printf("0,0"); + break; + case 1: + printf("1"); + break; + default: + printf("1,%d", count); + break; + } +} + +static void copy_file(int prefix, const char *data, int size) +{ + int ch, nl_just_seen = 1; + while (0 < size--) { + ch = *data++; + if (nl_just_seen) + putchar(prefix); + putchar(ch); + if (ch == '\n') + nl_just_seen = 1; + else + nl_just_seen = 0; + } + if (!nl_just_seen) + printf("\n\\ No newline at end of file\n"); +} + +static void emit_rewrite_diff(const char *name_a, + const char *name_b, + struct diff_filespec *one, + struct diff_filespec *two) +{ + int lc_a, lc_b; + diff_populate_filespec(one, 0); + diff_populate_filespec(two, 0); + lc_a = count_lines(one->data, one->size); + lc_b = count_lines(two->data, two->size); + printf("--- %s\n+++ %s\n@@ -", name_a, name_b); + print_line_count(lc_a); + printf(" +"); + print_line_count(lc_b); + printf(" @@\n"); + if (lc_a) + copy_file('-', one->data, one->size); + if (lc_b) + copy_file('+', two->data, two->size); +} + +static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one) +{ + if (!DIFF_FILE_VALID(one)) { + mf->ptr = ""; /* does not matter */ + mf->size = 0; + return 0; + } + else if (diff_populate_filespec(one, 0)) + return -1; + mf->ptr = one->data; + mf->size = one->size; + return 0; +} + +struct emit_callback { + const char **label_path; +}; + +static int fn_out(void *priv, mmbuffer_t *mb, int nbuf) +{ + int i; + struct emit_callback *ecbdata = priv; + + if (ecbdata->label_path[0]) { + printf("--- %s\n", ecbdata->label_path[0]); + printf("+++ %s\n", ecbdata->label_path[1]); + ecbdata->label_path[0] = ecbdata->label_path[1] = NULL; + } + for (i = 0; i < nbuf; i++) + if (!fwrite(mb[i].ptr, mb[i].size, 1, stdout)) + return -1; + return 0; +} + +static char *pprint_rename(const char *a, const char *b) +{ + const char *old = a; + const char *new = b; + char *name = NULL; + int pfx_length, sfx_length; + int len_a = strlen(a); + int len_b = strlen(b); + + /* Find common prefix */ + pfx_length = 0; + while (*old && *new && *old == *new) { + if (*old == '/') + pfx_length = old - a + 1; + old++; + new++; + } + + /* Find common suffix */ + old = a + len_a; + new = b + len_b; + sfx_length = 0; + while (a <= old && b <= new && *old == *new) { + if (*old == '/') + sfx_length = len_a - (old - a); + old--; + new--; + } + + /* + * pfx{mid-a => mid-b}sfx + * {pfx-a => pfx-b}sfx + * pfx{sfx-a => sfx-b} + * name-a => name-b + */ + if (pfx_length + sfx_length) { + name = xmalloc(len_a + len_b - pfx_length - sfx_length + 7); + sprintf(name, "%.*s{%.*s => %.*s}%s", + pfx_length, a, + len_a - pfx_length - sfx_length, a + pfx_length, + len_b - pfx_length - sfx_length, b + pfx_length, + a + len_a - sfx_length); + } + else { + name = xmalloc(len_a + len_b + 5); + sprintf(name, "%s => %s", a, b); + } + return name; +} + +struct diffstat_t { + struct xdiff_emit_state xm; + + int nr; + int alloc; + struct diffstat_file { + char *name; + unsigned is_unmerged:1; + unsigned is_binary:1; + unsigned is_renamed:1; + unsigned int added, deleted; + } **files; +}; + +static struct diffstat_file *diffstat_add(struct diffstat_t *diffstat, + const char *name_a, + const char *name_b) +{ + struct diffstat_file *x; + x = xcalloc(sizeof (*x), 1); + if (diffstat->nr == diffstat->alloc) { + diffstat->alloc = alloc_nr(diffstat->alloc); + diffstat->files = xrealloc(diffstat->files, + diffstat->alloc * sizeof(x)); + } + diffstat->files[diffstat->nr++] = x; + if (name_b) { + x->name = pprint_rename(name_a, name_b); + x->is_renamed = 1; + } + else + x->name = strdup(name_a); + return x; +} + +static void diffstat_consume(void *priv, char *line, unsigned long len) +{ + struct diffstat_t *diffstat = priv; + struct diffstat_file *x = diffstat->files[diffstat->nr - 1]; + + if (line[0] == '+') + x->added++; + else if (line[0] == '-') + x->deleted++; +} + +static const char pluses[] = "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"; +static const char minuses[]= "----------------------------------------------------------------------"; + +static void show_stats(struct diffstat_t* data) +{ + char *prefix = ""; + int i, len, add, del, total, adds = 0, dels = 0; + int max, max_change = 0, max_len = 0; + int total_files = data->nr; + + if (data->nr == 0) + return; + + for (i = 0; i < data->nr; i++) { + struct diffstat_file *file = data->files[i]; + + len = strlen(file->name); + if (max_len < len) + max_len = len; + + if (file->is_binary || file->is_unmerged) + continue; + if (max_change < file->added + file->deleted) + max_change = file->added + file->deleted; + } + + for (i = 0; i < data->nr; i++) { + char *name = data->files[i]->name; + int added = data->files[i]->added; + int deleted = data->files[i]->deleted; + + if (0 < (len = quote_c_style(name, NULL, NULL, 0))) { + char *qname = xmalloc(len + 1); + quote_c_style(name, qname, NULL, 0); + free(name); + data->files[i]->name = name = qname; + } + + /* + * "scale" the filename + */ + len = strlen(name); + max = max_len; + if (max > 50) + max = 50; + if (len > max) { + char *slash; + prefix = "..."; + max -= 3; + name += len - max; + slash = strchr(name, '/'); + if (slash) + name = slash; + } + len = max; + + /* + * scale the add/delete + */ + max = max_change; + if (max + len > 70) + max = 70 - len; + + if (data->files[i]->is_binary) { + printf(" %s%-*s | Bin\n", prefix, len, name); + goto free_diffstat_file; + } + else if (data->files[i]->is_unmerged) { + printf(" %s%-*s | Unmerged\n", prefix, len, name); + goto free_diffstat_file; + } + else if (!data->files[i]->is_renamed && + (added + deleted == 0)) { + total_files--; + goto free_diffstat_file; + } + + add = added; + del = deleted; + total = add + del; + adds += add; + dels += del; + + if (max_change > 0) { + total = (total * max + max_change / 2) / max_change; + add = (add * max + max_change / 2) / max_change; + del = total - add; + } + printf(" %s%-*s |%5d %.*s%.*s\n", prefix, + len, name, added + deleted, + add, pluses, del, minuses); + free_diffstat_file: + free(data->files[i]->name); + free(data->files[i]); + } + free(data->files); + printf(" %d files changed, %d insertions(+), %d deletions(-)\n", + total_files, adds, dels); +} + +#define FIRST_FEW_BYTES 8000 +static int mmfile_is_binary(mmfile_t *mf) +{ + long sz = mf->size; + if (FIRST_FEW_BYTES < sz) + sz = FIRST_FEW_BYTES; + if (memchr(mf->ptr, 0, sz)) + return 1; + return 0; +} + +static void builtin_diff(const char *name_a, + const char *name_b, + struct diff_filespec *one, + struct diff_filespec *two, + const char *xfrm_msg, + int complete_rewrite) +{ + mmfile_t mf1, mf2; + const char *lbl[2]; + char *a_one, *b_two; + + a_one = quote_two("a/", name_a); + b_two = quote_two("b/", name_b); + lbl[0] = DIFF_FILE_VALID(one) ? a_one : "/dev/null"; + lbl[1] = DIFF_FILE_VALID(two) ? b_two : "/dev/null"; + printf("diff --git %s %s\n", a_one, b_two); + if (lbl[0][0] == '/') { + /* /dev/null */ + printf("new file mode %06o\n", two->mode); + if (xfrm_msg && xfrm_msg[0]) + puts(xfrm_msg); + } + else if (lbl[1][0] == '/') { + printf("deleted file mode %06o\n", one->mode); + if (xfrm_msg && xfrm_msg[0]) + puts(xfrm_msg); + } + else { + if (one->mode != two->mode) { + printf("old mode %06o\n", one->mode); + printf("new mode %06o\n", two->mode); + } + if (xfrm_msg && xfrm_msg[0]) + puts(xfrm_msg); + /* + * we do not run diff between different kind + * of objects. + */ + if ((one->mode ^ two->mode) & S_IFMT) + goto free_ab_and_return; + if (complete_rewrite) { + emit_rewrite_diff(name_a, name_b, one, two); + goto free_ab_and_return; + } + } + + if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) + die("unable to read files to diff"); + + if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) + printf("Binary files %s and %s differ\n", lbl[0], lbl[1]); + else { + /* Crazy xdl interfaces.. */ + const char *diffopts = getenv("GIT_DIFF_OPTS"); + xpparam_t xpp; + xdemitconf_t xecfg; + xdemitcb_t ecb; + struct emit_callback ecbdata; + + ecbdata.label_path = lbl; + xpp.flags = XDF_NEED_MINIMAL; + xecfg.ctxlen = 3; + xecfg.flags = XDL_EMIT_FUNCNAMES; + if (!diffopts) + ; + else if (!strncmp(diffopts, "--unified=", 10)) + xecfg.ctxlen = strtoul(diffopts + 10, NULL, 10); + else if (!strncmp(diffopts, "-u", 2)) + xecfg.ctxlen = strtoul(diffopts + 2, NULL, 10); + ecb.outf = fn_out; + ecb.priv = &ecbdata; + xdl_diff(&mf1, &mf2, &xpp, &xecfg, &ecb); + } + + free_ab_and_return: + free(a_one); + free(b_two); + return; +} + +static void builtin_diffstat(const char *name_a, const char *name_b, + struct diff_filespec *one, + struct diff_filespec *two, + struct diffstat_t *diffstat) +{ + mmfile_t mf1, mf2; + struct diffstat_file *data; + + data = diffstat_add(diffstat, name_a, name_b); + + if (!one || !two) { + data->is_unmerged = 1; + return; + } + + if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) + die("unable to read files to diff"); + + if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) + data->is_binary = 1; + else { + /* Crazy xdl interfaces.. */ + xpparam_t xpp; + xdemitconf_t xecfg; + xdemitcb_t ecb; + + xpp.flags = XDF_NEED_MINIMAL; + xecfg.ctxlen = 0; + xecfg.flags = 0; + ecb.outf = xdiff_outf; + ecb.priv = diffstat; + xdl_diff(&mf1, &mf2, &xpp, &xecfg, &ecb); + } +} + +struct diff_filespec *alloc_filespec(const char *path) +{ + int namelen = strlen(path); + struct diff_filespec *spec = xmalloc(sizeof(*spec) + namelen + 1); + + memset(spec, 0, sizeof(*spec)); + spec->path = (char *)(spec + 1); + memcpy(spec->path, path, namelen+1); + return spec; +} + +void fill_filespec(struct diff_filespec *spec, const unsigned char *sha1, + unsigned short mode) +{ + if (mode) { + spec->mode = canon_mode(mode); + memcpy(spec->sha1, sha1, 20); + spec->sha1_valid = !!memcmp(sha1, null_sha1, 20); + } +} + +/* + * Given a name and sha1 pair, if the dircache tells us the file in + * the work tree has that object contents, return true, so that + * prepare_temp_file() does not have to inflate and extract. + */ +static int work_tree_matches(const char *name, const unsigned char *sha1) +{ + struct cache_entry *ce; + struct stat st; + int pos, len; + + /* We do not read the cache ourselves here, because the + * benchmark with my previous version that always reads cache + * shows that it makes things worse for diff-tree comparing + * two linux-2.6 kernel trees in an already checked out work + * tree. This is because most diff-tree comparisons deal with + * only a small number of files, while reading the cache is + * expensive for a large project, and its cost outweighs the + * savings we get by not inflating the object to a temporary + * file. Practically, this code only helps when we are used + * by diff-cache --cached, which does read the cache before + * calling us. + */ + if (!active_cache) + return 0; + + len = strlen(name); + pos = cache_name_pos(name, len); + if (pos < 0) + return 0; + ce = active_cache[pos]; + if ((lstat(name, &st) < 0) || + !S_ISREG(st.st_mode) || /* careful! */ + ce_match_stat(ce, &st, 0) || + memcmp(sha1, ce->sha1, 20)) + return 0; + /* we return 1 only when we can stat, it is a regular file, + * stat information matches, and sha1 recorded in the cache + * matches. I.e. we know the file in the work tree really is + * the same as the <name, sha1> pair. + */ + return 1; +} + +static struct sha1_size_cache { + unsigned char sha1[20]; + unsigned long size; +} **sha1_size_cache; +static int sha1_size_cache_nr, sha1_size_cache_alloc; + +static struct sha1_size_cache *locate_size_cache(unsigned char *sha1, + int find_only, + unsigned long size) +{ + int first, last; + struct sha1_size_cache *e; + + first = 0; + last = sha1_size_cache_nr; + while (last > first) { + int cmp, next = (last + first) >> 1; + e = sha1_size_cache[next]; + cmp = memcmp(e->sha1, sha1, 20); + if (!cmp) + return e; + if (cmp < 0) { + last = next; + continue; + } + first = next+1; + } + /* not found */ + if (find_only) + return NULL; + /* insert to make it at "first" */ + if (sha1_size_cache_alloc <= sha1_size_cache_nr) { + sha1_size_cache_alloc = alloc_nr(sha1_size_cache_alloc); + sha1_size_cache = xrealloc(sha1_size_cache, + sha1_size_cache_alloc * + sizeof(*sha1_size_cache)); + } + sha1_size_cache_nr++; + if (first < sha1_size_cache_nr) + memmove(sha1_size_cache + first + 1, sha1_size_cache + first, + (sha1_size_cache_nr - first - 1) * + sizeof(*sha1_size_cache)); + e = xmalloc(sizeof(struct sha1_size_cache)); + sha1_size_cache[first] = e; + memcpy(e->sha1, sha1, 20); + e->size = size; + return e; +} + +/* + * While doing rename detection and pickaxe operation, we may need to + * grab the data for the blob (or file) for our own in-core comparison. + * diff_filespec has data and size fields for this purpose. + */ +int diff_populate_filespec(struct diff_filespec *s, int size_only) +{ + int err = 0; + if (!DIFF_FILE_VALID(s)) + die("internal error: asking to populate invalid file."); + if (S_ISDIR(s->mode)) + return -1; + + if (!use_size_cache) + size_only = 0; + + if (s->data) + return err; + if (!s->sha1_valid || + work_tree_matches(s->path, s->sha1)) { + struct stat st; + int fd; + if (lstat(s->path, &st) < 0) { + if (errno == ENOENT) { + err_empty: + err = -1; + empty: + s->data = ""; + s->size = 0; + return err; + } + } + s->size = st.st_size; + if (!s->size) + goto empty; + if (size_only) + return 0; + if (S_ISLNK(st.st_mode)) { + int ret; + s->data = xmalloc(s->size); + s->should_free = 1; + ret = readlink(s->path, s->data, s->size); + if (ret < 0) { + free(s->data); + goto err_empty; + } + return 0; + } + fd = open(s->path, O_RDONLY); + if (fd < 0) + goto err_empty; + s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + if (s->data == MAP_FAILED) + goto err_empty; + s->should_munmap = 1; + } + else { + char type[20]; + struct sha1_size_cache *e; + + if (size_only) { + e = locate_size_cache(s->sha1, 1, 0); + if (e) { + s->size = e->size; + return 0; + } + if (!sha1_object_info(s->sha1, type, &s->size)) + locate_size_cache(s->sha1, 0, s->size); + } + else { + s->data = read_sha1_file(s->sha1, type, &s->size); + s->should_free = 1; + } + } + return 0; +} + +void diff_free_filespec_data(struct diff_filespec *s) +{ + if (s->should_free) + free(s->data); + else if (s->should_munmap) + munmap(s->data, s->size); + s->should_free = s->should_munmap = 0; + s->data = NULL; + free(s->cnt_data); + s->cnt_data = NULL; +} + +static void prep_temp_blob(struct diff_tempfile *temp, + void *blob, + unsigned long size, + const unsigned char *sha1, + int mode) +{ + int fd; + + fd = git_mkstemp(temp->tmp_path, TEMPFILE_PATH_LEN, ".diff_XXXXXX"); + if (fd < 0) + die("unable to create temp-file"); + if (write(fd, blob, size) != size) + die("unable to write temp-file"); + close(fd); + temp->name = temp->tmp_path; + strcpy(temp->hex, sha1_to_hex(sha1)); + temp->hex[40] = 0; + sprintf(temp->mode, "%06o", mode); +} + +static void prepare_temp_file(const char *name, + struct diff_tempfile *temp, + struct diff_filespec *one) +{ + if (!DIFF_FILE_VALID(one)) { + not_a_valid_file: + /* A '-' entry produces this for file-2, and + * a '+' entry produces this for file-1. + */ + temp->name = "/dev/null"; + strcpy(temp->hex, "."); + strcpy(temp->mode, "."); + return; + } + + if (!one->sha1_valid || + work_tree_matches(name, one->sha1)) { + struct stat st; + if (lstat(name, &st) < 0) { + if (errno == ENOENT) + goto not_a_valid_file; + die("stat(%s): %s", name, strerror(errno)); + } + if (S_ISLNK(st.st_mode)) { + int ret; + char buf[PATH_MAX + 1]; /* ought to be SYMLINK_MAX */ + if (sizeof(buf) <= st.st_size) + die("symlink too long: %s", name); + ret = readlink(name, buf, st.st_size); + if (ret < 0) + die("readlink(%s)", name); + prep_temp_blob(temp, buf, st.st_size, + (one->sha1_valid ? + one->sha1 : null_sha1), + (one->sha1_valid ? + one->mode : S_IFLNK)); + } + else { + /* we can borrow from the file in the work tree */ + temp->name = name; + if (!one->sha1_valid) + strcpy(temp->hex, sha1_to_hex(null_sha1)); + else + strcpy(temp->hex, sha1_to_hex(one->sha1)); + /* Even though we may sometimes borrow the + * contents from the work tree, we always want + * one->mode. mode is trustworthy even when + * !(one->sha1_valid), as long as + * DIFF_FILE_VALID(one). + */ + sprintf(temp->mode, "%06o", one->mode); + } + return; + } + else { + if (diff_populate_filespec(one, 0)) + die("cannot read data blob for %s", one->path); + prep_temp_blob(temp, one->data, one->size, + one->sha1, one->mode); + } +} + +static void remove_tempfile(void) +{ + int i; + + for (i = 0; i < 2; i++) + if (diff_temp[i].name == diff_temp[i].tmp_path) { + unlink(diff_temp[i].name); + diff_temp[i].name = NULL; + } +} + +static void remove_tempfile_on_signal(int signo) +{ + remove_tempfile(); + signal(SIGINT, SIG_DFL); + raise(signo); +} + +static int spawn_prog(const char *pgm, const char **arg) +{ + pid_t pid; + int status; + + fflush(NULL); + pid = fork(); + if (pid < 0) + die("unable to fork"); + if (!pid) { + execvp(pgm, (char *const*) arg); + exit(255); + } + + while (waitpid(pid, &status, 0) < 0) { + if (errno == EINTR) + continue; + return -1; + } + + /* Earlier we did not check the exit status because + * diff exits non-zero if files are different, and + * we are not interested in knowing that. It was a + * mistake which made it harder to quit a diff-* + * session that uses the git-apply-patch-script as + * the GIT_EXTERNAL_DIFF. A custom GIT_EXTERNAL_DIFF + * should also exit non-zero only when it wants to + * abort the entire diff-* session. + */ + if (WIFEXITED(status) && !WEXITSTATUS(status)) + return 0; + return -1; +} + +/* An external diff command takes: + * + * diff-cmd name infile1 infile1-sha1 infile1-mode \ + * infile2 infile2-sha1 infile2-mode [ rename-to ] + * + */ +static void run_external_diff(const char *pgm, + const char *name, + const char *other, + struct diff_filespec *one, + struct diff_filespec *two, + const char *xfrm_msg, + int complete_rewrite) +{ + const char *spawn_arg[10]; + struct diff_tempfile *temp = diff_temp; + int retval; + static int atexit_asked = 0; + const char *othername; + const char **arg = &spawn_arg[0]; + + othername = (other? other : name); + if (one && two) { + prepare_temp_file(name, &temp[0], one); + prepare_temp_file(othername, &temp[1], two); + if (! atexit_asked && + (temp[0].name == temp[0].tmp_path || + temp[1].name == temp[1].tmp_path)) { + atexit_asked = 1; + atexit(remove_tempfile); + } + signal(SIGINT, remove_tempfile_on_signal); + } + + if (one && two) { + *arg++ = pgm; + *arg++ = name; + *arg++ = temp[0].name; + *arg++ = temp[0].hex; + *arg++ = temp[0].mode; + *arg++ = temp[1].name; + *arg++ = temp[1].hex; + *arg++ = temp[1].mode; + if (other) { + *arg++ = other; + *arg++ = xfrm_msg; + } + } else { + *arg++ = pgm; + *arg++ = name; + } + *arg = NULL; + retval = spawn_prog(pgm, spawn_arg); + remove_tempfile(); + if (retval) { + fprintf(stderr, "external diff died, stopping at %s.\n", name); + exit(1); + } +} + +static void run_diff_cmd(const char *pgm, + const char *name, + const char *other, + struct diff_filespec *one, + struct diff_filespec *two, + const char *xfrm_msg, + int complete_rewrite) +{ + if (pgm) { + run_external_diff(pgm, name, other, one, two, xfrm_msg, + complete_rewrite); + return; + } + if (one && two) + builtin_diff(name, other ? other : name, + one, two, xfrm_msg, complete_rewrite); + else + printf("* Unmerged path %s\n", name); +} + +static void diff_fill_sha1_info(struct diff_filespec *one) +{ + if (DIFF_FILE_VALID(one)) { + if (!one->sha1_valid) { + struct stat st; + if (lstat(one->path, &st) < 0) + die("stat %s", one->path); + if (index_path(one->sha1, one->path, &st, 0)) + die("cannot hash %s\n", one->path); + } + } + else + memset(one->sha1, 0, 20); +} + +static void run_diff(struct diff_filepair *p, struct diff_options *o) +{ + const char *pgm = external_diff(); + char msg[PATH_MAX*2+300], *xfrm_msg; + struct diff_filespec *one; + struct diff_filespec *two; + const char *name; + const char *other; + char *name_munged, *other_munged; + int complete_rewrite = 0; + int len; + + if (DIFF_PAIR_UNMERGED(p)) { + /* unmerged */ + run_diff_cmd(pgm, p->one->path, NULL, NULL, NULL, NULL, 0); + return; + } + + name = p->one->path; + other = (strcmp(name, p->two->path) ? p->two->path : NULL); + name_munged = quote_one(name); + other_munged = quote_one(other); + one = p->one; two = p->two; + + diff_fill_sha1_info(one); + diff_fill_sha1_info(two); + + len = 0; + switch (p->status) { + case DIFF_STATUS_COPIED: + len += snprintf(msg + len, sizeof(msg) - len, + "similarity index %d%%\n" + "copy from %s\n" + "copy to %s\n", + (int)(0.5 + p->score * 100.0/MAX_SCORE), + name_munged, other_munged); + break; + case DIFF_STATUS_RENAMED: + len += snprintf(msg + len, sizeof(msg) - len, + "similarity index %d%%\n" + "rename from %s\n" + "rename to %s\n", + (int)(0.5 + p->score * 100.0/MAX_SCORE), + name_munged, other_munged); + break; + case DIFF_STATUS_MODIFIED: + if (p->score) { + len += snprintf(msg + len, sizeof(msg) - len, + "dissimilarity index %d%%\n", + (int)(0.5 + p->score * + 100.0/MAX_SCORE)); + complete_rewrite = 1; + break; + } + /* fallthru */ + default: + /* nothing */ + ; + } + + if (memcmp(one->sha1, two->sha1, 20)) { + char one_sha1[41]; + int abbrev = o->full_index ? 40 : DEFAULT_ABBREV; + memcpy(one_sha1, sha1_to_hex(one->sha1), 41); + + len += snprintf(msg + len, sizeof(msg) - len, + "index %.*s..%.*s", + abbrev, one_sha1, abbrev, + sha1_to_hex(two->sha1)); + if (one->mode == two->mode) + len += snprintf(msg + len, sizeof(msg) - len, + " %06o", one->mode); + len += snprintf(msg + len, sizeof(msg) - len, "\n"); + } + + if (len) + msg[--len] = 0; + xfrm_msg = len ? msg : NULL; + + if (!pgm && + DIFF_FILE_VALID(one) && DIFF_FILE_VALID(two) && + (S_IFMT & one->mode) != (S_IFMT & two->mode)) { + /* a filepair that changes between file and symlink + * needs to be split into deletion and creation. + */ + struct diff_filespec *null = alloc_filespec(two->path); + run_diff_cmd(NULL, name, other, one, null, xfrm_msg, 0); + free(null); + null = alloc_filespec(one->path); + run_diff_cmd(NULL, name, other, null, two, xfrm_msg, 0); + free(null); + } + else + run_diff_cmd(pgm, name, other, one, two, xfrm_msg, + complete_rewrite); + + free(name_munged); + free(other_munged); +} + +static void run_diffstat(struct diff_filepair *p, struct diff_options *o, + struct diffstat_t *diffstat) +{ + const char *name; + const char *other; + + if (DIFF_PAIR_UNMERGED(p)) { + /* unmerged */ + builtin_diffstat(p->one->path, NULL, NULL, NULL, diffstat); + return; + } + + name = p->one->path; + other = (strcmp(name, p->two->path) ? p->two->path : NULL); + + diff_fill_sha1_info(p->one); + diff_fill_sha1_info(p->two); + + builtin_diffstat(name, other, p->one, p->two, diffstat); +} + +void diff_setup(struct diff_options *options) +{ + memset(options, 0, sizeof(*options)); + options->output_format = DIFF_FORMAT_RAW; + options->line_termination = '\n'; + options->break_opt = -1; + options->rename_limit = -1; + + options->change = diff_change; + options->add_remove = diff_addremove; +} + +int diff_setup_done(struct diff_options *options) +{ + if ((options->find_copies_harder && + options->detect_rename != DIFF_DETECT_COPY) || + (0 <= options->rename_limit && !options->detect_rename)) + return -1; + + /* + * These cases always need recursive; we do not drop caller-supplied + * recursive bits for other formats here. + */ + if ((options->output_format == DIFF_FORMAT_PATCH) || + (options->output_format == DIFF_FORMAT_DIFFSTAT)) + options->recursive = 1; + + if (options->detect_rename && options->rename_limit < 0) + options->rename_limit = diff_rename_limit_default; + if (options->setup & DIFF_SETUP_USE_CACHE) { + if (!active_cache) + /* read-cache does not die even when it fails + * so it is safe for us to do this here. Also + * it does not smudge active_cache or active_nr + * when it fails, so we do not have to worry about + * cleaning it up ourselves either. + */ + read_cache(); + } + if (options->setup & DIFF_SETUP_USE_SIZE_CACHE) + use_size_cache = 1; + if (options->abbrev <= 0 || 40 < options->abbrev) + options->abbrev = 40; /* full */ + + return 0; +} + +int diff_opt_parse(struct diff_options *options, const char **av, int ac) +{ + const char *arg = av[0]; + if (!strcmp(arg, "-p") || !strcmp(arg, "-u")) + options->output_format = DIFF_FORMAT_PATCH; + else if (!strcmp(arg, "--patch-with-raw")) { + options->output_format = DIFF_FORMAT_PATCH; + options->with_raw = 1; + } + else if (!strcmp(arg, "--stat")) + options->output_format = DIFF_FORMAT_DIFFSTAT; + else if (!strcmp(arg, "--patch-with-stat")) { + options->output_format = DIFF_FORMAT_PATCH; + options->with_stat = 1; + } + else if (!strcmp(arg, "-z")) + options->line_termination = 0; + else if (!strncmp(arg, "-l", 2)) + options->rename_limit = strtoul(arg+2, NULL, 10); + else if (!strcmp(arg, "--full-index")) + options->full_index = 1; + else if (!strcmp(arg, "--name-only")) + options->output_format = DIFF_FORMAT_NAME; + else if (!strcmp(arg, "--name-status")) + options->output_format = DIFF_FORMAT_NAME_STATUS; + else if (!strcmp(arg, "-R")) + options->reverse_diff = 1; + else if (!strncmp(arg, "-S", 2)) + options->pickaxe = arg + 2; + else if (!strcmp(arg, "-s")) + options->output_format = DIFF_FORMAT_NO_OUTPUT; + else if (!strncmp(arg, "-O", 2)) + options->orderfile = arg + 2; + else if (!strncmp(arg, "--diff-filter=", 14)) + options->filter = arg + 14; + else if (!strcmp(arg, "--pickaxe-all")) + options->pickaxe_opts = DIFF_PICKAXE_ALL; + else if (!strcmp(arg, "--pickaxe-regex")) + options->pickaxe_opts = DIFF_PICKAXE_REGEX; + else if (!strncmp(arg, "-B", 2)) { + if ((options->break_opt = + diff_scoreopt_parse(arg)) == -1) + return -1; + } + else if (!strncmp(arg, "-M", 2)) { + if ((options->rename_score = + diff_scoreopt_parse(arg)) == -1) + return -1; + options->detect_rename = DIFF_DETECT_RENAME; + } + else if (!strncmp(arg, "-C", 2)) { + if ((options->rename_score = + diff_scoreopt_parse(arg)) == -1) + return -1; + options->detect_rename = DIFF_DETECT_COPY; + } + else if (!strcmp(arg, "--find-copies-harder")) + options->find_copies_harder = 1; + else if (!strcmp(arg, "--abbrev")) + options->abbrev = DEFAULT_ABBREV; + else if (!strncmp(arg, "--abbrev=", 9)) { + options->abbrev = strtoul(arg + 9, NULL, 10); + if (options->abbrev < MINIMUM_ABBREV) + options->abbrev = MINIMUM_ABBREV; + else if (40 < options->abbrev) + options->abbrev = 40; + } + else + return 0; + return 1; +} + +static int parse_num(const char **cp_p) +{ + unsigned long num, scale; + int ch, dot; + const char *cp = *cp_p; + + num = 0; + scale = 1; + dot = 0; + for(;;) { + ch = *cp; + if ( !dot && ch == '.' ) { + scale = 1; + dot = 1; + } else if ( ch == '%' ) { + scale = dot ? scale*100 : 100; + cp++; /* % is always at the end */ + break; + } else if ( ch >= '0' && ch <= '9' ) { + if ( scale < 100000 ) { + scale *= 10; + num = (num*10) + (ch-'0'); + } + } else { + break; + } + cp++; + } + *cp_p = cp; + + /* user says num divided by scale and we say internally that + * is MAX_SCORE * num / scale. + */ + return (num >= scale) ? MAX_SCORE : (MAX_SCORE * num / scale); +} + +int diff_scoreopt_parse(const char *opt) +{ + int opt1, opt2, cmd; + + if (*opt++ != '-') + return -1; + cmd = *opt++; + if (cmd != 'M' && cmd != 'C' && cmd != 'B') + return -1; /* that is not a -M, -C nor -B option */ + + opt1 = parse_num(&opt); + if (cmd != 'B') + opt2 = 0; + else { + if (*opt == 0) + opt2 = 0; + else if (*opt != '/') + return -1; /* we expect -B80/99 or -B80 */ + else { + opt++; + opt2 = parse_num(&opt); + } + } + if (*opt != 0) + return -1; + return opt1 | (opt2 << 16); +} + +struct diff_queue_struct diff_queued_diff; + +void diff_q(struct diff_queue_struct *queue, struct diff_filepair *dp) +{ + if (queue->alloc <= queue->nr) { + queue->alloc = alloc_nr(queue->alloc); + queue->queue = xrealloc(queue->queue, + sizeof(dp) * queue->alloc); + } + queue->queue[queue->nr++] = dp; +} + +struct diff_filepair *diff_queue(struct diff_queue_struct *queue, + struct diff_filespec *one, + struct diff_filespec *two) +{ + struct diff_filepair *dp = xmalloc(sizeof(*dp)); + dp->one = one; + dp->two = two; + dp->score = 0; + dp->status = 0; + dp->source_stays = 0; + dp->broken_pair = 0; + if (queue) + diff_q(queue, dp); + return dp; +} + +void diff_free_filepair(struct diff_filepair *p) +{ + diff_free_filespec_data(p->one); + diff_free_filespec_data(p->two); + free(p->one); + free(p->two); + free(p); +} + +/* This is different from find_unique_abbrev() in that + * it stuffs the result with dots for alignment. + */ +const char *diff_unique_abbrev(const unsigned char *sha1, int len) +{ + int abblen; + const char *abbrev; + if (len == 40) + return sha1_to_hex(sha1); + + abbrev = find_unique_abbrev(sha1, len); + if (!abbrev) + return sha1_to_hex(sha1); + abblen = strlen(abbrev); + if (abblen < 37) { + static char hex[41]; + if (len < abblen && abblen <= len + 2) + sprintf(hex, "%s%.*s", abbrev, len+3-abblen, ".."); + else + sprintf(hex, "%s...", abbrev); + return hex; + } + return sha1_to_hex(sha1); +} + +static void diff_flush_raw(struct diff_filepair *p, + int line_termination, + int inter_name_termination, + struct diff_options *options, + int output_format) +{ + int two_paths; + char status[10]; + int abbrev = options->abbrev; + const char *path_one, *path_two; + + path_one = p->one->path; + path_two = p->two->path; + if (line_termination) { + path_one = quote_one(path_one); + path_two = quote_one(path_two); + } + + if (p->score) + sprintf(status, "%c%03d", p->status, + (int)(0.5 + p->score * 100.0/MAX_SCORE)); + else { + status[0] = p->status; + status[1] = 0; + } + switch (p->status) { + case DIFF_STATUS_COPIED: + case DIFF_STATUS_RENAMED: + two_paths = 1; + break; + case DIFF_STATUS_ADDED: + case DIFF_STATUS_DELETED: + two_paths = 0; + break; + default: + two_paths = 0; + break; + } + if (output_format != DIFF_FORMAT_NAME_STATUS) { + printf(":%06o %06o %s ", + p->one->mode, p->two->mode, + diff_unique_abbrev(p->one->sha1, abbrev)); + printf("%s ", + diff_unique_abbrev(p->two->sha1, abbrev)); + } + printf("%s%c%s", status, inter_name_termination, path_one); + if (two_paths) + printf("%c%s", inter_name_termination, path_two); + putchar(line_termination); + if (path_one != p->one->path) + free((void*)path_one); + if (path_two != p->two->path) + free((void*)path_two); +} + +static void diff_flush_name(struct diff_filepair *p, + int inter_name_termination, + int line_termination) +{ + char *path = p->two->path; + + if (line_termination) + path = quote_one(p->two->path); + else + path = p->two->path; + printf("%s%c", path, line_termination); + if (p->two->path != path) + free(path); +} + +int diff_unmodified_pair(struct diff_filepair *p) +{ + /* This function is written stricter than necessary to support + * the currently implemented transformers, but the idea is to + * let transformers to produce diff_filepairs any way they want, + * and filter and clean them up here before producing the output. + */ + struct diff_filespec *one, *two; + + if (DIFF_PAIR_UNMERGED(p)) + return 0; /* unmerged is interesting */ + + one = p->one; + two = p->two; + + /* deletion, addition, mode or type change + * and rename are all interesting. + */ + if (DIFF_FILE_VALID(one) != DIFF_FILE_VALID(two) || + DIFF_PAIR_MODE_CHANGED(p) || + strcmp(one->path, two->path)) + return 0; + + /* both are valid and point at the same path. that is, we are + * dealing with a change. + */ + if (one->sha1_valid && two->sha1_valid && + !memcmp(one->sha1, two->sha1, sizeof(one->sha1))) + return 1; /* no change */ + if (!one->sha1_valid && !two->sha1_valid) + return 1; /* both look at the same file on the filesystem. */ + return 0; +} + +static void diff_flush_patch(struct diff_filepair *p, struct diff_options *o) +{ + if (diff_unmodified_pair(p)) + return; + + if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) || + (DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode))) + return; /* no tree diffs in patch format */ + + run_diff(p, o); +} + +static void diff_flush_stat(struct diff_filepair *p, struct diff_options *o, + struct diffstat_t *diffstat) +{ + if (diff_unmodified_pair(p)) + return; + + if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) || + (DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode))) + return; /* no tree diffs in patch format */ + + run_diffstat(p, o, diffstat); +} + +int diff_queue_is_empty(void) +{ + struct diff_queue_struct *q = &diff_queued_diff; + int i; + for (i = 0; i < q->nr; i++) + if (!diff_unmodified_pair(q->queue[i])) + return 0; + return 1; +} + +#if DIFF_DEBUG +void diff_debug_filespec(struct diff_filespec *s, int x, const char *one) +{ + fprintf(stderr, "queue[%d] %s (%s) %s %06o %s\n", + x, one ? one : "", + s->path, + DIFF_FILE_VALID(s) ? "valid" : "invalid", + s->mode, + s->sha1_valid ? sha1_to_hex(s->sha1) : ""); + fprintf(stderr, "queue[%d] %s size %lu flags %d\n", + x, one ? one : "", + s->size, s->xfrm_flags); +} + +void diff_debug_filepair(const struct diff_filepair *p, int i) +{ + diff_debug_filespec(p->one, i, "one"); + diff_debug_filespec(p->two, i, "two"); + fprintf(stderr, "score %d, status %c stays %d broken %d\n", + p->score, p->status ? p->status : '?', + p->source_stays, p->broken_pair); +} + +void diff_debug_queue(const char *msg, struct diff_queue_struct *q) +{ + int i; + if (msg) + fprintf(stderr, "%s\n", msg); + fprintf(stderr, "q->nr = %d\n", q->nr); + for (i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + diff_debug_filepair(p, i); + } +} +#endif + +static void diff_resolve_rename_copy(void) +{ + int i, j; + struct diff_filepair *p, *pp; + struct diff_queue_struct *q = &diff_queued_diff; + + diff_debug_queue("resolve-rename-copy", q); + + for (i = 0; i < q->nr; i++) { + p = q->queue[i]; + p->status = 0; /* undecided */ + if (DIFF_PAIR_UNMERGED(p)) + p->status = DIFF_STATUS_UNMERGED; + else if (!DIFF_FILE_VALID(p->one)) + p->status = DIFF_STATUS_ADDED; + else if (!DIFF_FILE_VALID(p->two)) + p->status = DIFF_STATUS_DELETED; + else if (DIFF_PAIR_TYPE_CHANGED(p)) + p->status = DIFF_STATUS_TYPE_CHANGED; + + /* from this point on, we are dealing with a pair + * whose both sides are valid and of the same type, i.e. + * either in-place edit or rename/copy edit. + */ + else if (DIFF_PAIR_RENAME(p)) { + if (p->source_stays) { + p->status = DIFF_STATUS_COPIED; + continue; + } + /* See if there is some other filepair that + * copies from the same source as us. If so + * we are a copy. Otherwise we are either a + * copy if the path stays, or a rename if it + * does not, but we already handled "stays" case. + */ + for (j = i + 1; j < q->nr; j++) { + pp = q->queue[j]; + if (strcmp(pp->one->path, p->one->path)) + continue; /* not us */ + if (!DIFF_PAIR_RENAME(pp)) + continue; /* not a rename/copy */ + /* pp is a rename/copy from the same source */ + p->status = DIFF_STATUS_COPIED; + break; + } + if (!p->status) + p->status = DIFF_STATUS_RENAMED; + } + else if (memcmp(p->one->sha1, p->two->sha1, 20) || + p->one->mode != p->two->mode) + p->status = DIFF_STATUS_MODIFIED; + else { + /* This is a "no-change" entry and should not + * happen anymore, but prepare for broken callers. + */ + error("feeding unmodified %s to diffcore", + p->one->path); + p->status = DIFF_STATUS_UNKNOWN; + } + } + diff_debug_queue("resolve-rename-copy done", q); +} + +static void flush_one_pair(struct diff_filepair *p, + int diff_output_format, + struct diff_options *options, + struct diffstat_t *diffstat) +{ + int inter_name_termination = '\t'; + int line_termination = options->line_termination; + if (!line_termination) + inter_name_termination = 0; + + switch (p->status) { + case DIFF_STATUS_UNKNOWN: + break; + case 0: + die("internal error in diff-resolve-rename-copy"); + break; + default: + switch (diff_output_format) { + case DIFF_FORMAT_DIFFSTAT: + diff_flush_stat(p, options, diffstat); + break; + case DIFF_FORMAT_PATCH: + diff_flush_patch(p, options); + break; + case DIFF_FORMAT_RAW: + case DIFF_FORMAT_NAME_STATUS: + diff_flush_raw(p, line_termination, + inter_name_termination, + options, diff_output_format); + break; + case DIFF_FORMAT_NAME: + diff_flush_name(p, + inter_name_termination, + line_termination); + break; + case DIFF_FORMAT_NO_OUTPUT: + break; + } + } +} + +void diff_flush(struct diff_options *options) +{ + struct diff_queue_struct *q = &diff_queued_diff; + int i; + int diff_output_format = options->output_format; + struct diffstat_t *diffstat = NULL; + + if (diff_output_format == DIFF_FORMAT_DIFFSTAT || options->with_stat) { + diffstat = xcalloc(sizeof (struct diffstat_t), 1); + diffstat->xm.consume = diffstat_consume; + } + + if (options->with_raw) { + for (i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + flush_one_pair(p, DIFF_FORMAT_RAW, options, NULL); + } + putchar(options->line_termination); + } + if (options->with_stat) { + for (i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + flush_one_pair(p, DIFF_FORMAT_DIFFSTAT, options, + diffstat); + } + show_stats(diffstat); + free(diffstat); + diffstat = NULL; + putchar(options->line_termination); + } + for (i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + flush_one_pair(p, diff_output_format, options, diffstat); + diff_free_filepair(p); + } + + if (diffstat) { + show_stats(diffstat); + free(diffstat); + } + + free(q->queue); + q->queue = NULL; + q->nr = q->alloc = 0; +} + +static void diffcore_apply_filter(const char *filter) +{ + int i; + struct diff_queue_struct *q = &diff_queued_diff; + struct diff_queue_struct outq; + outq.queue = NULL; + outq.nr = outq.alloc = 0; + + if (!filter) + return; + + if (strchr(filter, DIFF_STATUS_FILTER_AON)) { + int found; + for (i = found = 0; !found && i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + if (((p->status == DIFF_STATUS_MODIFIED) && + ((p->score && + strchr(filter, DIFF_STATUS_FILTER_BROKEN)) || + (!p->score && + strchr(filter, DIFF_STATUS_MODIFIED)))) || + ((p->status != DIFF_STATUS_MODIFIED) && + strchr(filter, p->status))) + found++; + } + if (found) + return; + + /* otherwise we will clear the whole queue + * by copying the empty outq at the end of this + * function, but first clear the current entries + * in the queue. + */ + for (i = 0; i < q->nr; i++) + diff_free_filepair(q->queue[i]); + } + else { + /* Only the matching ones */ + for (i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + + if (((p->status == DIFF_STATUS_MODIFIED) && + ((p->score && + strchr(filter, DIFF_STATUS_FILTER_BROKEN)) || + (!p->score && + strchr(filter, DIFF_STATUS_MODIFIED)))) || + ((p->status != DIFF_STATUS_MODIFIED) && + strchr(filter, p->status))) + diff_q(&outq, p); + else + diff_free_filepair(p); + } + } + free(q->queue); + *q = outq; +} + +void diffcore_std(struct diff_options *options) +{ + if (options->break_opt != -1) + diffcore_break(options->break_opt); + if (options->detect_rename) + diffcore_rename(options); + if (options->break_opt != -1) + diffcore_merge_broken(); + if (options->pickaxe) + diffcore_pickaxe(options->pickaxe, options->pickaxe_opts); + if (options->orderfile) + diffcore_order(options->orderfile); + diff_resolve_rename_copy(); + diffcore_apply_filter(options->filter); +} + + +void diffcore_std_no_resolve(struct diff_options *options) +{ + if (options->pickaxe) + diffcore_pickaxe(options->pickaxe, options->pickaxe_opts); + if (options->orderfile) + diffcore_order(options->orderfile); + diffcore_apply_filter(options->filter); +} + +void diff_addremove(struct diff_options *options, + int addremove, unsigned mode, + const unsigned char *sha1, + const char *base, const char *path) +{ + char concatpath[PATH_MAX]; + struct diff_filespec *one, *two; + + /* This may look odd, but it is a preparation for + * feeding "there are unchanged files which should + * not produce diffs, but when you are doing copy + * detection you would need them, so here they are" + * entries to the diff-core. They will be prefixed + * with something like '=' or '*' (I haven't decided + * which but should not make any difference). + * Feeding the same new and old to diff_change() + * also has the same effect. + * Before the final output happens, they are pruned after + * merged into rename/copy pairs as appropriate. + */ + if (options->reverse_diff) + addremove = (addremove == '+' ? '-' : + addremove == '-' ? '+' : addremove); + + if (!path) path = ""; + sprintf(concatpath, "%s%s", base, path); + one = alloc_filespec(concatpath); + two = alloc_filespec(concatpath); + + if (addremove != '+') + fill_filespec(one, sha1, mode); + if (addremove != '-') + fill_filespec(two, sha1, mode); + + diff_queue(&diff_queued_diff, one, two); +} + +void diff_change(struct diff_options *options, + unsigned old_mode, unsigned new_mode, + const unsigned char *old_sha1, + const unsigned char *new_sha1, + const char *base, const char *path) +{ + char concatpath[PATH_MAX]; + struct diff_filespec *one, *two; + + if (options->reverse_diff) { + unsigned tmp; + const unsigned char *tmp_c; + tmp = old_mode; old_mode = new_mode; new_mode = tmp; + tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c; + } + if (!path) path = ""; + sprintf(concatpath, "%s%s", base, path); + one = alloc_filespec(concatpath); + two = alloc_filespec(concatpath); + fill_filespec(one, old_sha1, old_mode); + fill_filespec(two, new_sha1, new_mode); + + diff_queue(&diff_queued_diff, one, two); +} + +void diff_unmerge(struct diff_options *options, + const char *path) +{ + struct diff_filespec *one, *two; + one = alloc_filespec(path); + two = alloc_filespec(path); + diff_queue(&diff_queued_diff, one, two); +} @@ -28,10 +28,11 @@ struct diff_options { with_raw:1, with_stat:1, tree_in_recursive:1, - full_index:1; + full_index:1, + silent_on_remove:1, + find_copies_harder:1; int break_opt; int detect_rename; - int find_copies_harder; int line_termination; int output_format; int pickaxe_opts; @@ -168,4 +169,8 @@ extern void diff_flush(struct diff_options*); extern const char *diff_unique_abbrev(const unsigned char *, int); +extern int run_diff_files(struct rev_info *revs, int silent_on_removed); + +extern int run_diff_index(struct rev_info *revs, int cached); + #endif /* DIFF_H */ diff --git a/dump-cache-tree.c b/dump-cache-tree.c new file mode 100644 index 0000000000..01e8bff0ee --- /dev/null +++ b/dump-cache-tree.c @@ -0,0 +1,30 @@ +#include "cache.h" +#include "tree.h" +#include "cache-tree.h" + +static void dump_cache_tree(struct cache_tree *it, const char *pfx) +{ + int i; + if (!it) + return; + if (it->entry_count < 0) + printf("%-40s %s\n", "invalid", pfx); + else + printf("%s %s (%d entries)\n", + sha1_to_hex(it->sha1), + pfx, it->entry_count); + for (i = 0; i < it->subtree_nr; i++) { + char path[PATH_MAX]; + struct cache_tree_sub *down = it->down[i]; + sprintf(path, "%s%.*s/", pfx, down->namelen, down->name); + dump_cache_tree(down->cache_tree, path); + } +} + +int main(int ac, char **av) +{ + if (read_cache() < 0) + die("unable to read index file"); + dump_cache_tree(active_cache_tree, ""); + return 0; +} @@ -47,6 +47,7 @@ static void handle_internal_command(int argc, const char **argv, char **envp) { "log", cmd_log }, { "whatchanged", cmd_whatchanged }, { "show", cmd_show }, + { "fmt-patch", cmd_format_patch }, }; int i; diff --git a/gsimm.c b/gsimm.c new file mode 100644 index 0000000000..7024bf8f58 --- /dev/null +++ b/gsimm.c @@ -0,0 +1,94 @@ +#include "rabinpoly.h" +#include "gsimm.h" + +/* Has to be power of two. Since the Rabin hash only has 63 + usable bits, the number of hashes is limited to 32. + Lower powers of two could be used for speeding up processing + of very large files. */ +#define NUM_HASHES_PER_CHAR 32 + +/* Size of cache used to eliminate duplicate substrings. + Make small enough to comfortably fit in L1 cache. */ +#define DUP_CACHE_SIZE 256 + +/* For the final counting, do not count each bit individually, but + group them. Must be power of two, at most NUM_HASHES_PER_CHAR. + However, larger sizes result in higher cache usage. Use 8 bits + per group for efficient processing of large files on fast machines + with decent caches, or 4 bits for faster processing of small files + and for machines with small caches. */ +#define GROUP_BITS 4 +#define GROUP_COUNTERS (1<<GROUP_BITS) + +static void freq_to_md(u_char *md, int *freq) +{ int j, k; + + for (j = 0; j < MD_LENGTH; j++) + { u_char ch = 0; + + for (k = 0; k < 8; k++) ch = 2*ch + (freq[8*j+k] > 0); + md[j] = ch; + } + bzero (freq, sizeof(freq[0]) * MD_BITS); +} + +void gb_simm_process(u_char *data, unsigned len, u_char *md) +{ size_t j = 0; + u_int32_t ofs; + u_int32_t dup_cache[DUP_CACHE_SIZE]; + u_int32_t count [MD_BITS * (GROUP_COUNTERS/GROUP_BITS)]; + int freq[MD_BITS]; + + bzero (freq, sizeof(freq[0]) * MD_BITS); + bzero (dup_cache, DUP_CACHE_SIZE * sizeof (u_int32_t)); + bzero (count, (MD_BITS * (GROUP_COUNTERS/GROUP_BITS) * sizeof (u_int32_t))); + + /* Ignore incomplete substrings */ + while (j < len && j < RABIN_WINDOW_SIZE) rabin_slide8 (data[j++]); + + while (j < len) + { u_int64_t hash; + u_int32_t ofs, sum; + u_char idx; + int k; + + hash = rabin_slide8 (data[j++]); + + /* In order to update a much larger frequency table + with only 32 bits of checksum, randomly select a + part of the table to update. The selection should + only depend on the content of the represented data, + and be independent of the bits used for the update. + + Instead of updating 32 individual counters, process + the checksum in MD_BITS / GROUP_BITS groups of + GROUP_BITS bits, and count the frequency of each bit pattern. + */ + + idx = (hash >> 32); + sum = (u_int32_t) hash; + ofs = idx % (MD_BITS / NUM_HASHES_PER_CHAR) * NUM_HASHES_PER_CHAR; + idx %= DUP_CACHE_SIZE; + if (dup_cache[idx] != sum) + { dup_cache[idx] = sum; + for (k = 0; k < NUM_HASHES_PER_CHAR / GROUP_BITS; k++) + { count[ofs * GROUP_COUNTERS / GROUP_BITS + (sum % GROUP_COUNTERS)]++; + ofs += GROUP_BITS; + sum >>= GROUP_BITS; + } } } + + /* Distribute the occurrences of each bit group over the frequency table. */ + for (ofs = 0; ofs < MD_BITS; ofs += GROUP_BITS) + { int j; + for (j = 0; j < GROUP_COUNTERS; j++) + { int k; + for (k = 0; k < GROUP_BITS; k++) + { freq[ofs + k] += ((1<<k) & j) + ? count[ofs * GROUP_COUNTERS / GROUP_BITS + j] + : -count[ofs * GROUP_COUNTERS / GROUP_BITS + j]; + } } } + + if (md) + { rabin_reset(); + freq_to_md (md, freq); +} } diff --git a/gsimm.h b/gsimm.h new file mode 100644 index 0000000000..4b023b91a9 --- /dev/null +++ b/gsimm.h @@ -0,0 +1,28 @@ +#ifndef GSIMM_H +#define GSIMM_H + +/* Length of file message digest (MD) in bytes. Longer MD's are + better, but increase processing time for diminishing returns. + Must be multiple of NUM_HASHES_PER_CHAR / 8, and at least 24 + for good results +*/ +#define MD_LENGTH 32 +#define MD_BITS (MD_LENGTH * 8) + +/* The MIN_FILE_SIZE indicates the absolute minimal file size that + can be processed. As indicated above, the first and last + RABIN_WINDOW_SIZE - 1 bytes are skipped. + In order to get at least an average of 12 samples + per bit in the final message digest, require at least 3 * MD_LENGTH + complete windows in the file. */ +#define MIN_FILE_SIZE (3 * MD_LENGTH + 2 * (RABIN_WINDOW_SIZE - 1)) + +/* Limit matching algorithm to files less than 256 MB, so we can use + 32 bit integers everywhere without fear of overflow. For larger + files we should add logic to mmap the file by piece and accumulate + the frequency counts. */ +#define MAX_FILE_SIZE (256*1024*1024 - 1) + +void gb_simm_process(u_char *data, unsigned len, u_char *md); + +#endif diff --git a/log-tree.c b/log-tree.c index 9634c4677f..aaf2b9423f 100644 --- a/log-tree.c +++ b/log-tree.c @@ -37,12 +37,20 @@ void show_log(struct rev_info *opt, struct log_info *log, const char *sep) /* * Print header line of header.. */ - printf("%s%s", - opt->commit_format == CMIT_FMT_ONELINE ? "" : "commit ", - diff_unique_abbrev(commit->object.sha1, abbrev_commit)); - if (parent) - printf(" (from %s)", diff_unique_abbrev(parent->object.sha1, abbrev_commit)); - putchar(opt->commit_format == CMIT_FMT_ONELINE ? ' ' : '\n'); + + if (opt->commit_format == CMIT_FMT_EMAIL) + printf("From %s Thu Apr 7 15:13:13 2005\n", + sha1_to_hex(commit->object.sha1)); + else { + printf("%s%s", + opt->commit_format == CMIT_FMT_ONELINE ? "" : "commit ", + diff_unique_abbrev(commit->object.sha1, abbrev_commit)); + if (parent) + printf(" (from %s)", + diff_unique_abbrev(parent->object.sha1, + abbrev_commit)); + putchar(opt->commit_format == CMIT_FMT_ONELINE ? ' ' : '\n'); + } /* * And then the pretty-printed message itself @@ -152,15 +160,18 @@ static int log_tree_diff(struct rev_info *opt, struct commit *commit, struct log int log_tree_commit(struct rev_info *opt, struct commit *commit) { struct log_info log; + int shown; log.commit = commit; log.parent = NULL; opt->loginfo = &log; - if (!log_tree_diff(opt, commit, &log) && opt->loginfo && opt->always_show_header) { + shown = log_tree_diff(opt, commit, &log); + if (!shown && opt->loginfo && opt->always_show_header) { log.parent = NULL; show_log(opt, opt->loginfo, ""); + shown = 1; } opt->loginfo = NULL; - return 0; + return shown; } diff --git a/rabinpoly.c b/rabinpoly.c new file mode 100644 index 0000000000..c26f3829c3 --- /dev/null +++ b/rabinpoly.c @@ -0,0 +1,154 @@ +/* + * + * Copyright (C) 1999 David Mazieres (dm@uun.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + + /* Faster generic_fls */ + /* (c) 2002, D.Phillips and Sistina Software */ + +#include "rabinpoly.h" +#define MSB64 0x8000000000000000ULL + +static inline unsigned fls8(unsigned n) +{ + return n & 0xf0? + n & 0xc0? (n >> 7) + 7: (n >> 5) + 5: + n & 0x0c? (n >> 3) + 3: n - ((n + 1) >> 2); +} + +static inline unsigned fls16(unsigned n) +{ + return n & 0xff00? fls8(n >> 8) + 8: fls8(n); +} + +static inline unsigned fls32(unsigned n) +{ + return n & 0xffff0000? fls16(n >> 16) + 16: fls16(n); +} + +static inline unsigned fls64(unsigned long long n) /* should be u64 */ +{ + return n & 0xffffffff00000000ULL? fls32(n >> 32) + 32: fls32(n); +} + + +static u_int64_t polymod (u_int64_t nh, u_int64_t nl, u_int64_t d); +static void polymult (u_int64_t *php, u_int64_t *plp, + u_int64_t x, u_int64_t y); +static u_int64_t polymmult (u_int64_t x, u_int64_t y, u_int64_t d); + +static u_int64_t poly = 0xb15e234bd3792f63ull; // Actual polynomial +static u_int64_t T[256]; // Lookup table for mod +static int shift; + +u_int64_t append8 (u_int64_t p, u_char m) +{ return ((p << 8) | m) ^ T[p >> shift]; +} + +static u_int64_t +polymod (u_int64_t nh, u_int64_t nl, u_int64_t d) +{ int i, k; + assert (d); + k = fls64 (d) - 1; + d <<= 63 - k; + + if (nh) { + if (nh & MSB64) + nh ^= d; + for (i = 62; i >= 0; i--) + if (nh & 1ULL << i) { + nh ^= d >> (63 - i); + nl ^= d << (i + 1); + } + } + for (i = 63; i >= k; i--) + if (nl & 1ULL << i) + nl ^= d >> (63 - i); + return nl; +} + +static void +polymult (u_int64_t *php, u_int64_t *plp, u_int64_t x, u_int64_t y) +{ int i; + u_int64_t ph = 0, pl = 0; + if (x & 1) + pl = y; + for (i = 1; i < 64; i++) + if (x & (1ULL << i)) { + ph ^= y >> (64 - i); + pl ^= y << i; + } + if (php) + *php = ph; + if (plp) + *plp = pl; +} + +static u_int64_t +polymmult (u_int64_t x, u_int64_t y, u_int64_t d) +{ + u_int64_t h, l; + polymult (&h, &l, x, y); + return polymod (h, l, d); +} + +static int size = RABIN_WINDOW_SIZE; +static u_int64_t fingerprint = 0; +static int bufpos = -1; +static u_int64_t U[256]; +static u_char buf[RABIN_WINDOW_SIZE]; + +void rabin_init () +{ u_int64_t sizeshift = 1; + u_int64_t T1; + int xshift; + int i, j; + assert (poly >= 0x100); + xshift = fls64 (poly) - 1; + shift = xshift - 8; + T1 = polymod (0, 1ULL << xshift, poly); + for (j = 0; j < 256; j++) + T[j] = polymmult (j, T1, poly) | ((u_int64_t) j << xshift); + + for (i = 1; i < size; i++) + sizeshift = append8 (sizeshift, 0); + for (i = 0; i < 256; i++) + U[i] = polymmult (i, sizeshift, poly); + bzero (buf, sizeof (buf)); +} + +void +rabin_reset () +{ rabin_init(); + fingerprint = 0; + bzero (buf, sizeof (buf)); +} + +u_int64_t +rabin_slide8 (u_char m) +{ u_char om; + if (++bufpos >= size) bufpos = 0; + + om = buf[bufpos]; + buf[bufpos] = m; + fingerprint = append8 (fingerprint ^ U[om], m); + + return fingerprint; +} + diff --git a/rabinpoly.h b/rabinpoly.h new file mode 100644 index 0000000000..a19a097459 --- /dev/null +++ b/rabinpoly.h @@ -0,0 +1,31 @@ +/* + * + * Copyright (C) 2000 David Mazieres (dm@uun.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + * Translated to C and simplified by Geert Bosch (bosch@gnat.com) + */ + +#include <assert.h> +#include <strings.h> +#include <sys/types.h> + +#ifndef RABIN_WINDOW_SIZE +#define RABIN_WINDOW_SIZE 48 +#endif +void rabin_reset(); +u_int64_t rabin_slide8(u_char c); diff --git a/read-cache.c b/read-cache.c index f97f92d90a..1f71d12578 100644 --- a/read-cache.c +++ b/read-cache.c @@ -4,11 +4,26 @@ * Copyright (C) Linus Torvalds, 2005 */ #include "cache.h" +#include "cache-tree.h" + +/* Index extensions. + * + * The first letter should be 'A'..'Z' for extensions that are not + * necessary for a correct operation (i.e. optimization data). + * When new extensions are added that _needs_ to be understood in + * order to correctly interpret the index file, pick character that + * is outside the range, to cause the reader to abort. + */ + +#define CACHE_EXT(s) ( (s[0]<<24)|(s[1]<<16)|(s[2]<<8)|(s[3]) ) +#define CACHE_EXT_TREE 0x54524545 /* "TREE" */ struct cache_entry **active_cache = NULL; static time_t index_file_timestamp; unsigned int active_nr = 0, active_alloc = 0, active_cache_changed = 0; +struct cache_tree *active_cache_tree = NULL; + /* * This only updates the "non-critical" parts of the directory * cache, ie the parts that aren't tracked by GIT, and only used @@ -513,6 +528,22 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size) return 0; } +static int read_index_extension(const char *ext, void *data, unsigned long sz) +{ + switch (CACHE_EXT(ext)) { + case CACHE_EXT_TREE: + active_cache_tree = cache_tree_read(data, sz); + break; + default: + if (*ext < 'A' || 'Z' < *ext) + return error("index uses %.4s extension, which we do not understand", + ext); + fprintf(stderr, "ignoring %.4s extension\n", ext); + break; + } + return 0; +} + int read_cache(void) { int fd, i; @@ -561,6 +592,22 @@ int read_cache(void) active_cache[i] = ce; } index_file_timestamp = st.st_mtime; + while (offset <= size - 20 - 8) { + /* After an array of active_nr index entries, + * there can be arbitrary number of extended + * sections, each of which is prefixed with + * extension name (4-byte) and section length + * in 4-byte network byte order. + */ + unsigned long extsize; + memcpy(&extsize, map + offset + 4, 4); + extsize = ntohl(extsize); + if (read_index_extension(map + offset, + map + offset + 8, extsize) < 0) + goto unmap; + offset += 8; + offset += extsize; + } return active_nr; unmap: @@ -595,6 +642,17 @@ static int ce_write(SHA_CTX *context, int fd, void *data, unsigned int len) return 0; } +static int write_index_ext_header(SHA_CTX *context, int fd, + unsigned long ext, unsigned long sz) +{ + ext = htonl(ext); + sz = htonl(sz); + if ((ce_write(context, fd, &ext, 4) < 0) || + (ce_write(context, fd, &sz, 4) < 0)) + return -1; + return 0; +} + static int ce_flush(SHA_CTX *context, int fd) { unsigned int left = write_buffer_len; @@ -691,5 +749,19 @@ int write_cache(int newfd, struct cache_entry **cache, int entries) if (ce_write(&c, newfd, ce, ce_size(ce)) < 0) return -1; } + + /* Write extension data here */ + if (active_cache_tree) { + unsigned long sz; + void *data = cache_tree_write(active_cache_tree, &sz); + if (data && + !write_index_ext_header(&c, newfd, CACHE_EXT_TREE, sz) && + !ce_write(&c, newfd, data, sz)) + ; + else { + free(data); + return -1; + } + } return ce_flush(&c, newfd); } diff --git a/read-tree.c b/read-tree.c index 26f4f7e323..1c65101291 100644 --- a/read-tree.c +++ b/read-tree.c @@ -9,6 +9,7 @@ #include "object.h" #include "tree.h" +#include "cache-tree.h" #include <sys/time.h> #include <signal.h> @@ -828,6 +829,7 @@ int main(int argc, char **argv) } unpack_trees(fn); + cache_tree_free(&active_cache_tree); if (write_cache(newfd, active_cache, active_nr) || commit_index_file(&cache_file)) die("unable to write new index file"); diff --git a/t/t1001-read-tree-m-2way.sh b/t/t1001-read-tree-m-2way.sh index d0ed24275e..75e4c9a886 100755 --- a/t/t1001-read-tree-m-2way.sh +++ b/t/t1001-read-tree-m-2way.sh @@ -37,7 +37,7 @@ compare_change () { } check_cache_at () { - clean_if_empty=`git-diff-files "$1"` + clean_if_empty=`git-diff-files -- "$1"` case "$clean_if_empty" in '') echo "$1: clean" ;; ?*) echo "$1: dirty" ;; diff --git a/t/t1002-read-tree-m-u-2way.sh b/t/t1002-read-tree-m-u-2way.sh index 861ef4c0c6..4d175d8ea1 100755 --- a/t/t1002-read-tree-m-u-2way.sh +++ b/t/t1002-read-tree-m-u-2way.sh @@ -20,7 +20,7 @@ compare_change () { } check_cache_at () { - clean_if_empty=`git-diff-files "$1"` + clean_if_empty=`git-diff-files -- "$1"` case "$clean_if_empty" in '') echo "$1: clean" ;; ?*) echo "$1: dirty" ;; diff --git a/t/t4010-diff-pathspec.sh b/t/t4010-diff-pathspec.sh index 8db329d7ff..9e1544df9d 100755 --- a/t/t4010-diff-pathspec.sh +++ b/t/t4010-diff-pathspec.sh @@ -28,7 +28,7 @@ cat >expected <<\EOF EOF test_expect_success \ 'limit to path should show nothing' \ - 'git-diff-index --cached $tree path >current && + 'git-diff-index --cached $tree -- path >current && compare_diff_raw current expected' cat >expected <<\EOF @@ -36,7 +36,7 @@ cat >expected <<\EOF EOF test_expect_success \ 'limit to path1 should show path1/file1' \ - 'git-diff-index --cached $tree path1 >current && + 'git-diff-index --cached $tree -- path1 >current && compare_diff_raw current expected' cat >expected <<\EOF @@ -44,7 +44,7 @@ cat >expected <<\EOF EOF test_expect_success \ 'limit to path1/ should show path1/file1' \ - 'git-diff-index --cached $tree path1/ >current && + 'git-diff-index --cached $tree -- path1/ >current && compare_diff_raw current expected' cat >expected <<\EOF @@ -52,14 +52,14 @@ cat >expected <<\EOF EOF test_expect_success \ 'limit to file0 should show file0' \ - 'git-diff-index --cached $tree file0 >current && + 'git-diff-index --cached $tree -- file0 >current && compare_diff_raw current expected' cat >expected <<\EOF EOF test_expect_success \ 'limit to file0/ should emit nothing.' \ - 'git-diff-index --cached $tree file0/ >current && + 'git-diff-index --cached $tree -- file0/ >current && compare_diff_raw current expected' test_done diff --git a/test-gsimm.c b/test-gsimm.c new file mode 100644 index 0000000000..bd28b7da28 --- /dev/null +++ b/test-gsimm.c @@ -0,0 +1,209 @@ +#include <unistd.h> +#include <stdlib.h> +#include <fcntl.h> +#include <libgen.h> +#include <stdio.h> +#include <assert.h> +#include <math.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> + +#include "rabinpoly.h" +#include "gsimm.h" + +#define MIN(x,y) ((y)<(x) ? (y) : (x)) +#define MAX(x,y) ((y)>(x) ? (y) : (x)) + +/* The RABIN_WINDOW_SIZE is the size of fingerprint window used by + Rabin algorithm. This is not a modifiable parameter. + + The first RABIN_WINDOW_SIZE - 1 bytes are skipped, in order to ensure + fingerprints are good hashes. This does somewhat reduce the + influence of the first few bytes in the file (they're part of + fewer windows, like the last few bytes), but that actually isn't + so bad as files often start with fixed content that may bias comparisons. +*/ + +typedef struct fileinfo +{ char *name; + size_t length; + u_char md[MD_LENGTH]; + int match; +} File; + +int flag_verbose = 0; +int flag_debug = 0; +char *flag_relative = 0; + +char cmd[12] = " ..."; +char md_strbuf[MD_LENGTH * 2 + 1]; +u_char relative_md [MD_LENGTH]; + +File *file; +int file_count; +size_t file_bytes; + +char hex[17] = "0123456789abcdef"; + +void usage() +{ fprintf (stderr, "usage: %s [-dhvw] [-r fingerprint] file ...\n", cmd); + fprintf (stderr, " -d\tdebug output, repeate for more verbosity\n"); + fprintf (stderr, " -h\tshow this usage information\n"); + fprintf (stderr, " -r\tshow distance relative to fingerprint " + "(%u hex digits)\n", MD_LENGTH * 2); + fprintf (stderr, " -v\tverbose output, repeat for even more verbosity\n"); + fprintf (stderr, " -w\tenable warnings for suspect statistics\n"); + exit (1); +} + +int dist (u_char *l, u_char *r) +{ int j, k; + int d = 0; + + for (j = 0; j < MD_LENGTH; j++) + { u_char ch = l[j] ^ r[j]; + + for (k = 0; k < 8; k++) d += ((ch & (1<<k)) > 0); + } + + return d; +} + +char *md_to_str(u_char *md) +{ int j; + + for (j = 0; j < MD_LENGTH; j++) + { u_char ch = md[j]; + + md_strbuf[j*2] = hex[ch >> 4]; + md_strbuf[j*2+1] = hex[ch & 0xF]; + } + + md_strbuf[j*2] = 0; + return md_strbuf; +} + +void process_file (char *name) +{ int fd; + struct stat fs; + u_char *data; + File *fi = file+file_count;; + + fd = open (name, O_RDONLY, 0); + if (fd < 0) + { perror (name); + exit (2); + } + + if (fstat (fd, &fs)) + { perror (name); + exit (2); + } + + if (fs.st_size >= MIN_FILE_SIZE + && fs.st_size <= MAX_FILE_SIZE) + { fi->length = fs.st_size; + fi->name = name; + + data = (u_char *) mmap (0, fs.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + + if (data == (u_char *) -1) + { perror (name); + exit (2); + } + + gb_simm_process (data, fs.st_size, fi->md); + if (flag_relative) + { int d = dist (fi->md, relative_md); + double sim = 1.0 - MIN (1.0, (double) (d) / (MD_LENGTH * 4 - 1)); + fprintf (stdout, "%s %llu %u %s %u %3.1f\n", + md_to_str (fi->md), (long long unsigned) 0, + (unsigned) fs.st_size, name, + d, 100.0 * sim); + } + else + { + fprintf (stdout, "%s %llu %u %s\n", + md_to_str (fi->md), (long long unsigned) 0, + (unsigned) fs.st_size, name); + } + munmap (data, fs.st_size); + file_bytes += fs.st_size; + file_count++; + } else if (flag_verbose) + { fprintf (stdout, "skipping %s (size %llu)\n", name, (long long unsigned) fs.st_size); } + + close (fd); +} + +u_char *str_to_md(char *str, u_char *md) +{ int j; + + if (!md || !str) return 0; + + bzero (md, MD_LENGTH); + + for (j = 0; j < MD_LENGTH * 2; j++) + { char ch = str[j]; + + if (ch >= '0' && ch <= '9') + { md [j/2] = (md [j/2] << 4) + (ch - '0'); + } + else + { ch |= 32; + + if (ch < 'a' || ch > 'f') break; + md [j/2] = (md[j/2] << 4) + (ch - 'a' + 10); + } } + + return (j != MD_LENGTH * 2 || str[j] != 0) ? 0 : md; +} + +int main (int argc, char *argv[]) +{ int ch, j; + + strncpy (cmd, basename (argv[0]), 8); + + while ((ch = getopt(argc, argv, "dhr:vw")) != -1) + { switch (ch) + { case 'd': flag_debug++; + break; + case 'r': if (!optarg) + { fprintf (stderr, "%s: missing argument for -r\n", cmd); + return 1; + } + if (str_to_md (optarg, relative_md)) flag_relative = optarg; + else + { fprintf (stderr, "%s: not a valid fingerprint\n", optarg); + return 1; + } + break; + case 'v': flag_verbose++; + break; + case 'w': break; + default : usage(); + return (ch != 'h'); + } } + + argc -= optind; + argv += optind; + + if (argc == 0) usage(); + + rabin_reset (); + if (flag_verbose && flag_relative) + { fprintf (stdout, "distances are relative to %s\n", flag_relative); + } + + file = (File *) calloc (argc, sizeof (File)); + + for (j = 0; j < argc; j++) process_file (argv[j]); + + if (flag_verbose) + { fprintf (stdout, "%li bytes in %i files\n", (long) file_bytes, file_count); + } + + return 0; +} diff --git a/update-index.c b/update-index.c index facec8d915..157488ff1a 100644 --- a/update-index.c +++ b/update-index.c @@ -7,6 +7,7 @@ #include "strbuf.h" #include "quote.h" #include "tree-walk.h" +#include "cache-tree.h" /* * Default to not allowing changes to the list of files. The @@ -71,6 +72,7 @@ static int mark_valid(const char *path) active_cache[pos]->ce_flags &= ~htons(CE_VALID); break; } + cache_tree_invalidate_path(active_cache_tree, path); active_cache_changed = 1; return 0; } @@ -84,6 +86,12 @@ static int add_file_to_cache(const char *path) struct stat st; status = lstat(path, &st); + + /* We probably want to do this in remove_file_from_cache() and + * add_cache_entry() instead... + */ + cache_tree_invalidate_path(active_cache_tree, path); + if (status < 0 || S_ISDIR(st.st_mode)) { /* When we used to have "path" and now we want to add * "path/file", we need a way to remove "path" before @@ -326,6 +334,7 @@ static int add_cacheinfo(unsigned int mode, const unsigned char *sha1, return error("%s: cannot add to the index - missing --add option?", path); report("add '%s'", path); + cache_tree_invalidate_path(active_cache_tree, path); return 0; } @@ -350,6 +359,7 @@ static void chmod_path(int flip, const char *path) default: goto fail; } + cache_tree_invalidate_path(active_cache_tree, path); active_cache_changed = 1; report("chmod %cx '%s'", flip, path); return; @@ -371,6 +381,7 @@ static void update_one(const char *path, const char *prefix, int prefix_length) die("Unable to mark file %s", path); return; } + cache_tree_invalidate_path(active_cache_tree, path); if (force_remove) { if (remove_file_from_cache(p)) @@ -446,6 +457,7 @@ static void read_index_info(int line_termination) free(path_name); continue; } + cache_tree_invalidate_path(active_cache_tree, path_name); if (!mode) { /* mode == 0 means there is no such path -- remove */ @@ -550,6 +562,7 @@ static int unresolve_one(const char *path) goto free_return; } + cache_tree_invalidate_path(active_cache_tree, path); remove_file_from_cache(path); if (add_cache_entry(ce_2, ADD_CACHE_OK_TO_ADD)) { error("%s: cannot add our version to the index.", path); diff --git a/write-tree.c b/write-tree.c index dcad6e6670..a5069921a0 100644 --- a/write-tree.c +++ b/write-tree.c @@ -5,95 +5,21 @@ */ #include "cache.h" #include "tree.h" +#include "cache-tree.h" static int missing_ok = 0; -static int check_valid_sha1(unsigned char *sha1) -{ - int ret; - - /* If we were anal, we'd check that the sha1 of the contents actually matches */ - ret = has_sha1_file(sha1); - if (ret == 0) - perror(sha1_file_name(sha1)); - return ret ? 0 : -1; -} - -static int write_tree(struct cache_entry **cachep, int maxentries, const char *base, int baselen, unsigned char *returnsha1) -{ - unsigned char subdir_sha1[20]; - unsigned long size, offset; - char *buffer; - int nr; - - /* Guess at some random initial size */ - size = 8192; - buffer = xmalloc(size); - offset = 0; - - nr = 0; - while (nr < maxentries) { - struct cache_entry *ce = cachep[nr]; - const char *pathname = ce->name, *filename, *dirname; - int pathlen = ce_namelen(ce), entrylen; - unsigned char *sha1; - unsigned int mode; - - /* Did we hit the end of the directory? Return how many we wrote */ - if (baselen >= pathlen || memcmp(base, pathname, baselen)) - break; - - sha1 = ce->sha1; - mode = ntohl(ce->ce_mode); - - /* Do we have _further_ subdirectories? */ - filename = pathname + baselen; - dirname = strchr(filename, '/'); - if (dirname) { - int subdir_written; - - subdir_written = write_tree(cachep + nr, maxentries - nr, pathname, dirname-pathname+1, subdir_sha1); - nr += subdir_written; - - /* Now we need to write out the directory entry into this tree.. */ - mode = S_IFDIR; - pathlen = dirname - pathname; - - /* ..but the directory entry doesn't count towards the total count */ - nr--; - sha1 = subdir_sha1; - } - - if (!missing_ok && check_valid_sha1(sha1) < 0) - exit(1); - - entrylen = pathlen - baselen; - if (offset + entrylen + 100 > size) { - size = alloc_nr(offset + entrylen + 100); - buffer = xrealloc(buffer, size); - } - offset += sprintf(buffer + offset, "%o %.*s", mode, entrylen, filename); - buffer[offset++] = 0; - memcpy(buffer + offset, sha1, 20); - offset += 20; - nr++; - } - - write_sha1_file(buffer, offset, tree_type, returnsha1); - free(buffer); - return nr; -} - static const char write_tree_usage[] = "git-write-tree [--missing-ok]"; +static struct cache_file cache_file; + int main(int argc, char **argv) { - int i, funny; - int entries; - unsigned char sha1[20]; - + int entries, was_valid, newfd; + setup_git_directory(); + newfd = hold_index_file_for_update(&cache_file, get_index_file()); entries = read_cache(); if (argc == 2) { if (!strcmp(argv[1], "--missing-ok")) @@ -108,51 +34,26 @@ int main(int argc, char **argv) if (entries < 0) die("git-write-tree: error reading cache"); - /* Verify that the tree is merged */ - funny = 0; - for (i = 0; i < entries; i++) { - struct cache_entry *ce = active_cache[i]; - if (ce_stage(ce)) { - if (10 < ++funny) { - fprintf(stderr, "...\n"); - break; - } - fprintf(stderr, "%s: unmerged (%s)\n", ce->name, sha1_to_hex(ce->sha1)); + if (!active_cache_tree) + active_cache_tree = cache_tree(); + + was_valid = cache_tree_fully_valid(active_cache_tree); + if (!was_valid) { + if (cache_tree_update(active_cache_tree, + active_cache, active_nr, + missing_ok) < 0) + die("git-write-tree: error building trees"); + if (0 <= newfd) { + if (!write_cache(newfd, active_cache, active_nr)) + commit_index_file(&cache_file); } - } - if (funny) - die("git-write-tree: not able to write tree"); - - /* Also verify that the cache does not have path and path/file - * at the same time. At this point we know the cache has only - * stage 0 entries. - */ - funny = 0; - for (i = 0; i < entries - 1; i++) { - /* path/file always comes after path because of the way - * the cache is sorted. Also path can appear only once, - * which means conflicting one would immediately follow. + /* Not being able to write is fine -- we are only interested + * in updating the cache-tree part, and if the next caller + * ends up using the old index with unupdated cache-tree part + * it misses the work we did here, but that is just a + * performance penalty and not a big deal. */ - const char *this_name = active_cache[i]->name; - const char *next_name = active_cache[i+1]->name; - int this_len = strlen(this_name); - if (this_len < strlen(next_name) && - strncmp(this_name, next_name, this_len) == 0 && - next_name[this_len] == '/') { - if (10 < ++funny) { - fprintf(stderr, "...\n"); - break; - } - fprintf(stderr, "You have both %s and %s\n", - this_name, next_name); - } } - if (funny) - die("git-write-tree: not able to write tree"); - - /* Ok, write it out */ - if (write_tree(active_cache, entries, "", 0, sha1) != entries) - die("git-write-tree: internal error"); - printf("%s\n", sha1_to_hex(sha1)); + printf("%s\n", sha1_to_hex(active_cache_tree->sha1)); return 0; } |