From 844c11ae259bd33b971b9ca389b3f9619427e9a8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 9 Apr 2007 21:13:29 -0700 Subject: diff-lib: use ce_mode_from_stat() rather than messing with modes manually The diff helpers used to do the magic mode canonicalization and all the other special mode handling by hand ("trust executable bit" and "has symlink support" handling). That's bogus. Use "ce_mode_from_stat()" that does this all for us. This is also going to be required when we add support for links to other git repositories. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- diff-lib.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 5c5b05bfe3..c6d127346a 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -357,7 +357,7 @@ int run_diff_files(struct rev_info *revs, int silent_on_removed) continue; } else - dpath->mode = canon_mode(st.st_mode); + dpath->mode = ntohl(ce_mode_from_stat(ce, st.st_mode)); while (i < entries) { struct cache_entry *nce = active_cache[i]; @@ -374,8 +374,7 @@ int run_diff_files(struct rev_info *revs, int silent_on_removed) int mode = ntohl(nce->ce_mode); num_compare_stages++; hashcpy(dpath->parent[stage-2].sha1, nce->sha1); - dpath->parent[stage-2].mode = - canon_mode(mode); + dpath->parent[stage-2].mode = ntohl(ce_mode_from_stat(nce, mode)); dpath->parent[stage-2].status = DIFF_STATUS_MODIFIED; } @@ -424,15 +423,7 @@ int run_diff_files(struct rev_info *revs, int silent_on_removed) if (!changed && !revs->diffopt.find_copies_harder) continue; oldmode = ntohl(ce->ce_mode); - - newmode = canon_mode(st.st_mode); - if (!trust_executable_bit && - S_ISREG(newmode) && S_ISREG(oldmode) && - ((newmode ^ oldmode) == 0111)) - newmode = oldmode; - else if (!has_symlinks && - S_ISREG(newmode) && S_ISLNK(oldmode)) - newmode = oldmode; + newmode = ntohl(ce_mode_from_stat(ce, st.st_mode)); diff_change(&revs->diffopt, oldmode, newmode, ce->sha1, (changed ? null_sha1 : ce->sha1), ce->name, NULL); -- cgit v1.2.3 From 1ad029b6a1e2fb0254667a2bea8d1ee180cc6ac7 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 13 Apr 2007 03:23:20 -0700 Subject: Do not default to --no-index when given two directories. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-diff -- a/ b/ always defaulted to --no-index, primarily because the function is_in_index() was implemented quite incorrectly. Noticed by Patrick Maaß and Simon Schubert independently, initial patch was provided by Patrick but I fixed it differently. Signed-off-by: Junio C Hamano --- diff-lib.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 5c5b05bfe3..7531e20c78 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -142,18 +142,34 @@ static int queue_diff(struct diff_options *o, } } +/* + * Does the path name a blob in the working tree, or a directory + * in the working tree? + */ static int is_in_index(const char *path) { - int len = strlen(path); - int pos = cache_name_pos(path, len); - char c; - - if (pos < 0) - return 0; - if (strncmp(active_cache[pos]->name, path, len)) - return 0; - c = active_cache[pos]->name[len]; - return c == '\0' || c == '/'; + int len, pos; + struct cache_entry *ce; + + len = strlen(path); + while (path[len-1] == '/') + len--; + if (!len) + return 1; /* "." */ + pos = cache_name_pos(path, len); + if (0 <= pos) + return 1; + pos = -1 - pos; + while (pos < active_nr) { + ce = active_cache[pos++]; + if (ce_namelen(ce) <= len || + strncmp(ce->name, path, len) || + (ce->name[len] > '/')) + break; /* path cannot be a prefix */ + if (ce->name[len] == '/') + return 1; + } + return 0; } static int handle_diff_files_args(struct rev_info *revs, -- cgit v1.2.3 From a6080a0a44d5ead84db3dabbbc80e82df838533d Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 7 Jun 2007 00:04:01 -0700 Subject: War on whitespace This uses "git-apply --whitespace=strip" to fix whitespace errors that have crept in to our source files over time. There are a few files that need to have trailing whitespaces (most notably, test vectors). The results still passes the test, and build result in Documentation/ area is unchanged. Signed-off-by: Junio C Hamano --- diff-lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 07f4e8106a..7fb19c7b87 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -664,7 +664,7 @@ int run_diff_index(struct rev_info *revs, int cached) const char *tree_name; int match_missing = 0; - /* + /* * Backward compatibility wart - "diff-index -m" does * not mean "do not ignore merges", but totally different. */ -- cgit v1.2.3 From 4d3f4b80e49275c7eaf6ba0dbddd6180957926b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 7 Jul 2007 20:19:08 +0200 Subject: diff-lib.c: don't strdup twice The static function read_directory in diff-lib.c is only ever called with struct path_list lists with .strdup_paths turned on, i.e. path_list_insert will strdup the paths for us (again). Let's take advantage of that and stop doing it twice. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- diff-lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 7fb19c7b87..92c0e39ad6 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -24,7 +24,7 @@ static int read_directory(const char *path, struct path_list *list) while ((e = readdir(dir))) if (strcmp(".", e->d_name) && strcmp("..", e->d_name)) - path_list_insert(xstrdup(e->d_name), list); + path_list_insert(e->d_name, list); closedir(dir); return 0; -- cgit v1.2.3 From 6d2d9e8666ef72c8878af4822e243ca33b6752a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Wed, 15 Aug 2007 00:41:00 +0200 Subject: diff: squelch empty diffs even more When we compare two non-tracked files, or explicitly specify --no-index, the suggestion to run git-status is not helpful. The patch adds a new diff_options bitfield member, no_index, that is used instead of the special value of -2 of the rev_info field max_count to indicate that the index is not to be used. This makes it possible to pass that flag down to diffcore_skip_stat_unmatch(), which only has one diff_options parameter. This could even become a cleanup if we removed all assignments of max_count to a value of -2 (viz. replacement of a magic value with a self-documenting field name) but I didn't dare to do that so late in the rc game.. The no_index bit, if set, then tells diffcore_skip_stat_unmatch() to not account for any skipped stat-mismatches, which avoids the suggestion to run git-status. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- diff-lib.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 92c0e39ad6..f5568c3b36 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -189,6 +189,7 @@ static int handle_diff_files_args(struct rev_info *revs, !strcmp(argv[1], "--no-index")) { revs->max_count = -2; revs->diffopt.exit_with_status = 1; + revs->diffopt.no_index = 1; } else if (!strcmp(argv[1], "-q")) *silent = 1; @@ -204,8 +205,10 @@ static int handle_diff_files_args(struct rev_info *revs, */ read_cache(); if (!is_in_index(revs->diffopt.paths[0]) || - !is_in_index(revs->diffopt.paths[1])) + !is_in_index(revs->diffopt.paths[1])) { revs->max_count = -2; + revs->diffopt.no_index = 1; + } } /* @@ -293,6 +296,7 @@ int setup_diff_no_index(struct rev_info *revs, else revs->diffopt.paths = argv + argc - 2; revs->diffopt.nr_paths = 2; + revs->diffopt.no_index = 1; revs->max_count = -2; return 0; } @@ -304,7 +308,7 @@ int run_diff_files_cmd(struct rev_info *revs, int argc, const char **argv) if (handle_diff_files_args(revs, argc, argv, &silent_on_removed)) return -1; - if (revs->max_count == -2) { + if (revs->diffopt.no_index) { if (revs->diffopt.nr_paths != 2) return error("need two files/directories with --no-index"); if (queue_diff(&revs->diffopt, revs->diffopt.paths[0], -- cgit v1.2.3 From b78281f7215c0236da6bd5c04ec5e500c83fd101 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 14 Sep 2007 12:12:32 -0700 Subject: diff --no-index: do not forget to run diff_setup_done() Code inspection by Linus found this. Signed-off-by: Junio C Hamano --- diff-lib.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index f5568c3b36..da5571302d 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -298,6 +298,8 @@ int setup_diff_no_index(struct rev_info *revs, revs->diffopt.nr_paths = 2; revs->diffopt.no_index = 1; revs->max_count = -2; + if (diff_setup_done(&revs->diffopt) < 0) + die("diff_setup_done failed"); return 0; } -- cgit v1.2.3 From 4bd5b7dacc404e6b733d9ab6744429c5027bb5e1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 10 Nov 2007 00:15:03 -0800 Subject: ce_match_stat, run_diff_files: use symbolic constants for readability ce_match_stat() can be told: (1) to ignore CE_VALID bit (used under "assume unchanged" mode) and perform the stat comparison anyway; (2) not to perform the contents comparison for racily clean entries and report mismatch of cached stat information; using its "option" parameter. Give them symbolic constants. Similarly, run_diff_files() can be told not to report anything on removed paths. Also give it a symbolic constant for that. Signed-off-by: Junio C Hamano --- diff-lib.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index da5571302d..9f8afbe71a 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -173,9 +173,10 @@ static int is_in_index(const char *path) } static int handle_diff_files_args(struct rev_info *revs, - int argc, const char **argv, int *silent) + int argc, const char **argv, + unsigned int *options) { - *silent = 0; + *options = 0; /* revs->max_count == -2 means --no-index */ while (1 < argc && argv[1][0] == '-') { @@ -192,7 +193,7 @@ static int handle_diff_files_args(struct rev_info *revs, revs->diffopt.no_index = 1; } else if (!strcmp(argv[1], "-q")) - *silent = 1; + *options |= DIFF_SILENT_ON_REMOVED; else return error("invalid option: %s", argv[1]); argv++; argc--; @@ -305,9 +306,9 @@ int setup_diff_no_index(struct rev_info *revs, int run_diff_files_cmd(struct rev_info *revs, int argc, const char **argv) { - int silent_on_removed; + unsigned int options; - if (handle_diff_files_args(revs, argc, argv, &silent_on_removed)) + if (handle_diff_files_args(revs, argc, argv, &options)) return -1; if (revs->diffopt.no_index) { @@ -329,13 +330,14 @@ int run_diff_files_cmd(struct rev_info *revs, int argc, const char **argv) perror("read_cache"); return -1; } - return run_diff_files(revs, silent_on_removed); + return run_diff_files(revs, options); } -int run_diff_files(struct rev_info *revs, int silent_on_removed) +int run_diff_files(struct rev_info *revs, unsigned int option) { int entries, i; int diff_unmerged_stage = revs->max_count; + int silent_on_removed = option & DIFF_SILENT_ON_REMOVED; if (diff_unmerged_stage < 0) diff_unmerged_stage = 2; -- cgit v1.2.3 From fb63d7f889cf5df417b731b07952689df7f745c8 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 9 Nov 2007 18:22:52 -0800 Subject: git-add: make the entry stat-clean after re-adding the same contents Earlier in commit 0781b8a9b2fe760fc4ed519a3a26e4b9bd6ccffe (add_file_to_index: skip rehashing if the cached stat already matches), add_file_to_index() were taught not to re-add the path if it already matches the index. The change meant well, but was not executed quite right. It used ie_modified() to see if the file on the work tree is really different from the index, and skipped adding the contents if the function says "not modified". This was wrong. There are three possible comparison results between the index and the file in the work tree: - with lstat(2) we _know_ they are different. E.g. if the length or the owner in the cached stat information is different from the length we just obtained from lstat(2), we can tell the file is modified without looking at the actual contents. - with lstat(2) we _know_ they are the same. The same length, the same owner, the same everything (but this has a twist, as described below). - we cannot tell from lstat(2) information alone and need to go to the filesystem to actually compare. The last case arises from what we call 'racy git' situation, that can be caused with this sequence: $ echo hello >file $ git add file $ echo aeiou >file ;# the same length If the second "echo" is done within the same filesystem timestamp granularity as the first "echo", then the timestamp recorded by "git add" and the timestamp we get from lstat(2) will be the same, and we can mistakenly say the file is not modified. The path is called 'racily clean'. We need to reliably detect racily clean paths are in fact modified. To solve this problem, when we write out the index, we mark the index entry that has the same timestamp as the index file itself (that is the time from the point of view of the filesystem) to tell any later code that does the lstat(2) comparison not to trust the cached stat info, and ie_modified() then actually goes to the filesystem to compare the contents for such a path. That's all good, but it should not be used for this "git add" optimization, as the goal of "git add" is to actually update the path in the index and make it stat-clean. With the false optimization, we did _not_ cause any data loss (after all, what we failed to do was only to update the cached stat information), but it made the following sequence leave the file stat dirty: $ echo hello >file $ git add file $ echo hello >file ;# the same contents $ git add file The solution is not to use ie_modified() which goes to the filesystem to see if it is really clean, but instead use ie_match_stat() with "assume racily clean paths are dirty" option, to force re-adding of such a path. There was another problem with "git add -u". The codepath shares the same issue when adding the paths that are found to be modified, but in addition, it asked "git diff-files" machinery run_diff_files() function (which is "git diff-files") to list the paths that are modified. But "git diff-files" machinery uses the same ie_modified() call so that it does not report racily clean _and_ actually clean paths as modified, which is not what we want. The patch allows the callers of run_diff_files() to pass the same "assume racily clean paths are dirty" option, and makes "git-add -u" codepath to use that option, to discover and re-add racily clean _and_ actually clean paths. We could further optimize on top of this patch to differentiate the case where the path really needs re-adding (i.e. the content of the racily clean entry was indeed different) and the case where only the cached stat information needs to be refreshed (i.e. the racily clean entry was actually clean), but I do not think it is worth it. This patch applies to maint and all the way up. Signed-off-by: Junio C Hamano --- diff-lib.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 9f8afbe71a..ec1b5e3d44 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -338,6 +338,8 @@ int run_diff_files(struct rev_info *revs, unsigned int option) int entries, i; int diff_unmerged_stage = revs->max_count; int silent_on_removed = option & DIFF_SILENT_ON_REMOVED; + unsigned ce_option = ((option & DIFF_RACY_IS_MODIFIED) + ? CE_MATCH_RACY_IS_DIRTY : 0); if (diff_unmerged_stage < 0) diff_unmerged_stage = 2; @@ -443,7 +445,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) ce->sha1, ce->name, NULL); continue; } - changed = ce_match_stat(ce, &st, 0); + changed = ce_match_stat(ce, &st, ce_option); if (!changed && !revs->diffopt.find_copies_harder) continue; oldmode = ntohl(ce->ce_mode); -- cgit v1.2.3 From 8f67f8aefb1b751073f8b36fae8be8f72eb93f4a Mon Sep 17 00:00:00 2001 From: Pierre Habouzit Date: Sat, 10 Nov 2007 20:05:14 +0100 Subject: Make the diff_options bitfields be an unsigned with explicit masks. reverse_diff was a bit-value in disguise, it's merged in the flags now. Signed-off-by: Pierre Habouzit Signed-off-by: Junio C Hamano --- diff-lib.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index da5571302d..290a170b05 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -121,7 +121,7 @@ static int queue_diff(struct diff_options *o, } else { struct diff_filespec *d1, *d2; - if (o->reverse_diff) { + if (DIFF_OPT_TST(o, REVERSE_DIFF)) { unsigned tmp; const char *tmp_c; tmp = mode1; mode1 = mode2; mode2 = tmp; @@ -188,8 +188,8 @@ static int handle_diff_files_args(struct rev_info *revs, else if (!strcmp(argv[1], "-n") || !strcmp(argv[1], "--no-index")) { revs->max_count = -2; - revs->diffopt.exit_with_status = 1; - revs->diffopt.no_index = 1; + DIFF_OPT_SET(&revs->diffopt, EXIT_WITH_STATUS); + DIFF_OPT_SET(&revs->diffopt, NO_INDEX); } else if (!strcmp(argv[1], "-q")) *silent = 1; @@ -207,7 +207,7 @@ static int handle_diff_files_args(struct rev_info *revs, if (!is_in_index(revs->diffopt.paths[0]) || !is_in_index(revs->diffopt.paths[1])) { revs->max_count = -2; - revs->diffopt.no_index = 1; + DIFF_OPT_SET(&revs->diffopt, NO_INDEX); } } @@ -258,7 +258,7 @@ int setup_diff_no_index(struct rev_info *revs, break; } else if (i < argc - 3 && !strcmp(argv[i], "--no-index")) { i = argc - 3; - revs->diffopt.exit_with_status = 1; + DIFF_OPT_SET(&revs->diffopt, EXIT_WITH_STATUS); break; } if (argc != i + 2 || (!is_outside_repo(argv[i + 1], nongit, prefix) && @@ -296,7 +296,7 @@ int setup_diff_no_index(struct rev_info *revs, else revs->diffopt.paths = argv + argc - 2; revs->diffopt.nr_paths = 2; - revs->diffopt.no_index = 1; + DIFF_OPT_SET(&revs->diffopt, NO_INDEX); revs->max_count = -2; if (diff_setup_done(&revs->diffopt) < 0) die("diff_setup_done failed"); @@ -310,7 +310,7 @@ int run_diff_files_cmd(struct rev_info *revs, int argc, const char **argv) if (handle_diff_files_args(revs, argc, argv, &silent_on_removed)) return -1; - if (revs->diffopt.no_index) { + if (DIFF_OPT_TST(&revs->diffopt, NO_INDEX)) { if (revs->diffopt.nr_paths != 2) return error("need two files/directories with --no-index"); if (queue_diff(&revs->diffopt, revs->diffopt.paths[0], @@ -346,7 +346,8 @@ int run_diff_files(struct rev_info *revs, int silent_on_removed) struct cache_entry *ce = active_cache[i]; int changed; - if (revs->diffopt.quiet && revs->diffopt.has_changes) + if (DIFF_OPT_TST(&revs->diffopt, QUIET) && + DIFF_OPT_TST(&revs->diffopt, HAS_CHANGES)) break; if (!ce_path_match(ce, revs->prune_data)) @@ -442,7 +443,7 @@ int run_diff_files(struct rev_info *revs, int silent_on_removed) continue; } changed = ce_match_stat(ce, &st, 0); - if (!changed && !revs->diffopt.find_copies_harder) + if (!changed && !DIFF_OPT_TST(&revs->diffopt, FIND_COPIES_HARDER)) continue; oldmode = ntohl(ce->ce_mode); newmode = ntohl(ce_mode_from_stat(ce, st.st_mode)); @@ -561,7 +562,7 @@ static int show_modified(struct rev_info *revs, oldmode = old->ce_mode; if (mode == oldmode && !hashcmp(sha1, old->sha1) && - !revs->diffopt.find_copies_harder) + !DIFF_OPT_TST(&revs->diffopt, FIND_COPIES_HARDER)) return 0; mode = ntohl(mode); @@ -581,7 +582,8 @@ static int diff_cache(struct rev_info *revs, struct cache_entry *ce = *ac; int same = (entries > 1) && ce_same_name(ce, ac[1]); - if (revs->diffopt.quiet && revs->diffopt.has_changes) + if (DIFF_OPT_TST(&revs->diffopt, QUIET) && + DIFF_OPT_TST(&revs->diffopt, HAS_CHANGES)) break; if (!ce_path_match(ce, pathspec)) -- cgit v1.2.3 From ecf4831d89ba6986249e1bf232b2e2f5f7536223 Mon Sep 17 00:00:00 2001 From: Steffen Prohaska Date: Sun, 25 Nov 2007 23:29:03 +0100 Subject: Use is_absolute_path() in diff-lib.c, lockfile.c, setup.c, trace.c Using the helper function to test for absolute paths makes porting easier. Signed-off-by: Steffen Prohaska Signed-off-by: Junio C Hamano --- diff-lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index f8e936ae10..d85d8f34ba 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -231,7 +231,7 @@ static int handle_diff_files_args(struct rev_info *revs, static int is_outside_repo(const char *path, int nongit, const char *prefix) { int i; - if (nongit || !strcmp(path, "-") || path[0] == '/') + if (nongit || !strcmp(path, "-") || is_absolute_path(path)) return 1; if (prefixcmp(path, "../")) return 0; -- cgit v1.2.3 From 7a51ed66f653c248993b3c4a61932e47933d835e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 14 Jan 2008 16:03:17 -0800 Subject: Make on-disk index representation separate from in-core one This converts the index explicitly on read and write to its on-disk format, allowing the in-core format to contain more flags, and be simpler. In particular, the in-core format is now host-endian (as opposed to the on-disk one that is network endian in order to be able to be shared across machines) and as a result we can dispense with all the htonl/ntohl on accesses to the cache_entry fields. This will make it easier to make use of various temporary flags that do not exist in the on-disk format. Signed-off-by: Linus Torvalds --- diff-lib.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index d85d8f34ba..4a05b02cd0 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -37,7 +37,7 @@ static int get_mode(const char *path, int *mode) if (!path || !strcmp(path, "/dev/null")) *mode = 0; else if (!strcmp(path, "-")) - *mode = ntohl(create_ce_mode(0666)); + *mode = create_ce_mode(0666); else if (stat(path, &st)) return error("Could not access '%s'", path); else @@ -384,7 +384,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) continue; } else - dpath->mode = ntohl(ce_mode_from_stat(ce, st.st_mode)); + dpath->mode = ce_mode_from_stat(ce, st.st_mode); while (i < entries) { struct cache_entry *nce = active_cache[i]; @@ -398,10 +398,10 @@ int run_diff_files(struct rev_info *revs, unsigned int option) */ stage = ce_stage(nce); if (2 <= stage) { - int mode = ntohl(nce->ce_mode); + int mode = nce->ce_mode; num_compare_stages++; hashcpy(dpath->parent[stage-2].sha1, nce->sha1); - dpath->parent[stage-2].mode = ntohl(ce_mode_from_stat(nce, mode)); + dpath->parent[stage-2].mode = ce_mode_from_stat(nce, mode); dpath->parent[stage-2].status = DIFF_STATUS_MODIFIED; } @@ -442,15 +442,15 @@ int run_diff_files(struct rev_info *revs, unsigned int option) } if (silent_on_removed) continue; - diff_addremove(&revs->diffopt, '-', ntohl(ce->ce_mode), + diff_addremove(&revs->diffopt, '-', ce->ce_mode, ce->sha1, ce->name, NULL); continue; } changed = ce_match_stat(ce, &st, ce_option); if (!changed && !DIFF_OPT_TST(&revs->diffopt, FIND_COPIES_HARDER)) continue; - oldmode = ntohl(ce->ce_mode); - newmode = ntohl(ce_mode_from_stat(ce, st.st_mode)); + oldmode = ce->ce_mode; + newmode = ce_mode_from_stat(ce, st.st_mode); diff_change(&revs->diffopt, oldmode, newmode, ce->sha1, (changed ? null_sha1 : ce->sha1), ce->name, NULL); @@ -471,7 +471,7 @@ static void diff_index_show_file(struct rev_info *revs, struct cache_entry *ce, unsigned char *sha1, unsigned int mode) { - diff_addremove(&revs->diffopt, prefix[0], ntohl(mode), + diff_addremove(&revs->diffopt, prefix[0], mode, sha1, ce->name, NULL); } @@ -550,14 +550,14 @@ static int show_modified(struct rev_info *revs, p->len = pathlen; memcpy(p->path, new->name, pathlen); p->path[pathlen] = 0; - p->mode = ntohl(mode); + p->mode = mode; hashclr(p->sha1); memset(p->parent, 0, 2 * sizeof(struct combine_diff_parent)); p->parent[0].status = DIFF_STATUS_MODIFIED; - p->parent[0].mode = ntohl(new->ce_mode); + p->parent[0].mode = new->ce_mode; hashcpy(p->parent[0].sha1, new->sha1); p->parent[1].status = DIFF_STATUS_MODIFIED; - p->parent[1].mode = ntohl(old->ce_mode); + p->parent[1].mode = old->ce_mode; hashcpy(p->parent[1].sha1, old->sha1); show_combined_diff(p, 2, revs->dense_combined_merges, revs); free(p); @@ -569,9 +569,6 @@ static int show_modified(struct rev_info *revs, !DIFF_OPT_TST(&revs->diffopt, FIND_COPIES_HARDER)) return 0; - mode = ntohl(mode); - oldmode = ntohl(oldmode); - diff_change(&revs->diffopt, oldmode, mode, old->sha1, sha1, old->name, NULL); return 0; @@ -628,7 +625,7 @@ static int diff_cache(struct rev_info *revs, cached, match_missing)) break; diff_unmerge(&revs->diffopt, ce->name, - ntohl(ce->ce_mode), ce->sha1); + ce->ce_mode, ce->sha1); break; case 3: diff_unmerge(&revs->diffopt, ce->name, @@ -664,7 +661,7 @@ static void mark_merge_entries(void) struct cache_entry *ce = active_cache[i]; if (!ce_stage(ce)) continue; - ce->ce_flags |= htons(CE_STAGEMASK); + ce->ce_flags |= CE_STAGEMASK; } } @@ -722,8 +719,7 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) cache_tree_invalidate_path(active_cache_tree, ce->name); last = ce; - ce->ce_mode = 0; - ce->ce_flags &= ~htons(CE_STAGEMASK); + ce->ce_flags |= CE_REMOVE; } *dst++ = ce; } -- cgit v1.2.3 From d1f2d7e8ca65504722108e2db710788f66c34c6c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 19 Jan 2008 17:27:12 -0800 Subject: Make run_diff_index() use unpack_trees(), not read_tree() A plain "git commit" would still run lstat() a lot more than necessary, because wt_status_print() would cause the index to be repeatedly flushed and re-read by wt_read_cache(), and that would cause the CE_UPTODATE bit to be lost, resulting in the files in the index being lstat'ed three times each. The reason why wt-status.c ended up invalidating and re-reading the cache multiple times was that it uses "run_diff_index()", which in turn uses "read_tree()" to populate the index with *both* the old index and the tree we want to compare against. So this patch re-writes run_diff_index() to not use read_tree(), but instead use "unpack_trees()" to diff the index to a tree. That, in turn, means that we don't need to modify the index itself, which then means that we don't need to invalidate it and re-read it! This, together with the lstat() optimizations, means that "git commit" on the kernel tree really only needs to lstat() the index entries once. That noticeably cuts down on the cached timings. Best time before: [torvalds@woody linux]$ time git commit > /dev/null real 0m0.399s user 0m0.232s sys 0m0.164s Best time after: [torvalds@woody linux]$ time git commit > /dev/null real 0m0.254s user 0m0.140s sys 0m0.112s so it's a noticeable improvement in addition to being a nice conceptual cleanup (it's really not that pretty that "run_diff_index()" dirties the index!) Doing an "strace -c" on it also shows that as it cuts the number of lstat() calls by two thirds, it goes from being lstat()-limited to being limited by getdents() (which is the readdir system call): Before: % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 60.69 0.000704 0 69230 31 lstat 23.62 0.000274 0 5522 getdents 8.36 0.000097 0 5508 2638 open 2.59 0.000030 0 2869 close 2.50 0.000029 0 274 write 1.47 0.000017 0 2844 fstat After: % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 45.17 0.000276 0 5522 getdents 26.51 0.000162 0 23112 31 lstat 19.80 0.000121 0 5503 2638 open 4.91 0.000030 0 2864 close 1.48 0.000020 0 274 write 1.34 0.000018 0 2844 fstat ... It passes the test-suite for me, but this is another of one of those really core functions, and certainly pretty subtle, so.. NOTE! The Linux lstat() system call is really quite cheap when everything is cached, so the fact that this is quite noticeable on Linux is likely to mean that it is *much* more noticeable on other operating systems. I bet you'll see a much bigger performance improvement from this on Windows in particular. Signed-off-by: Linus Torvalds --- diff-lib.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 137 insertions(+), 14 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 4a05b02cd0..045f3cc58e 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -9,6 +9,7 @@ #include "revision.h" #include "cache-tree.h" #include "path-list.h" +#include "unpack-trees.h" /* * diff-files @@ -435,6 +436,8 @@ int run_diff_files(struct rev_info *revs, unsigned int option) continue; } + if (ce_uptodate(ce)) + continue; if (lstat(ce->name, &st) < 0) { if (errno != ENOENT && errno != ENOTDIR) { perror(ce->name); @@ -665,20 +668,133 @@ static void mark_merge_entries(void) } } -int run_diff_index(struct rev_info *revs, int cached) +/* + * This gets a mix of an existing index and a tree, one pathname entry + * at a time. The index entry may be a single stage-0 one, but it could + * also be multiple unmerged entries (in which case idx_pos/idx_nr will + * give you the position and number of entries in the index). + */ +static void do_oneway_diff(struct unpack_trees_options *o, + struct cache_entry *idx, + struct cache_entry *tree, + int idx_pos, int idx_nr) { - int ret; - struct object *ent; - struct tree *tree; - const char *tree_name; - int match_missing = 0; + struct rev_info *revs = o->unpack_data; + int match_missing, cached; /* * Backward compatibility wart - "diff-index -m" does - * not mean "do not ignore merges", but totally different. + * not mean "do not ignore merges", but "match_missing". + * + * But with the revision flag parsing, that's found in + * "!revs->ignore_merges". + */ + cached = o->index_only; + match_missing = !revs->ignore_merges; + + if (cached && idx && ce_stage(idx)) { + if (tree) + diff_unmerge(&revs->diffopt, idx->name, idx->ce_mode, idx->sha1); + return; + } + + /* + * Something added to the tree? + */ + if (!tree) { + show_new_file(revs, idx, cached, match_missing); + return; + } + + /* + * Something removed from the tree? */ - if (!revs->ignore_merges) - match_missing = 1; + if (!idx) { + diff_index_show_file(revs, "-", tree, tree->sha1, tree->ce_mode); + return; + } + + /* Show difference between old and new */ + show_modified(revs, tree, idx, 1, cached, match_missing); +} + +/* + * Count how many index entries go with the first one + */ +static inline int count_skip(const struct cache_entry *src, int pos) +{ + int skip = 1; + + /* We can only have multiple entries if the first one is not stage-0 */ + if (ce_stage(src)) { + struct cache_entry **p = active_cache + pos; + int namelen = ce_namelen(src); + + for (;;) { + const struct cache_entry *ce; + pos++; + if (pos >= active_nr) + break; + ce = *++p; + if (ce_namelen(ce) != namelen) + break; + if (memcmp(ce->name, src->name, namelen)) + break; + skip++; + } + } + return skip; +} + +/* + * The unpack_trees() interface is designed for merging, so + * the different source entries are designed primarily for + * the source trees, with the old index being really mainly + * used for being replaced by the result. + * + * For diffing, the index is more important, and we only have a + * single tree. + * + * We're supposed to return how many index entries we want to skip. + * + * This wrapper makes it all more readable, and takes care of all + * the fairly complex unpack_trees() semantic requirements, including + * the skipping, the path matching, the type conflict cases etc. + */ +static int oneway_diff(struct cache_entry **src, + struct unpack_trees_options *o, + int index_pos) +{ + int skip = 0; + struct cache_entry *idx = src[0]; + struct cache_entry *tree = src[1]; + struct rev_info *revs = o->unpack_data; + + if (index_pos >= 0) + skip = count_skip(idx, index_pos); + + /* + * Unpack-trees generates a DF/conflict entry if + * there was a directory in the index and a tree + * in the tree. From a diff standpoint, that's a + * delete of the tree and a create of the file. + */ + if (tree == o->df_conflict_entry) + tree = NULL; + + if (ce_path_match(idx ? idx : tree, revs->prune_data)) + do_oneway_diff(o, idx, tree, index_pos, skip); + + return skip; +} + +int run_diff_index(struct rev_info *revs, int cached) +{ + struct object *ent; + struct tree *tree; + const char *tree_name; + struct unpack_trees_options opts; + struct tree_desc t; mark_merge_entries(); @@ -687,13 +803,20 @@ int run_diff_index(struct rev_info *revs, int cached) tree = parse_tree_indirect(ent->sha1); if (!tree) return error("bad tree object %s", tree_name); - if (read_tree(tree, 1, revs->prune_data)) - return error("unable to read tree object %s", tree_name); - ret = diff_cache(revs, active_cache, active_nr, revs->prune_data, - cached, match_missing); + + memset(&opts, 0, sizeof(opts)); + opts.head_idx = 1; + opts.index_only = cached; + opts.merge = 1; + opts.fn = oneway_diff; + opts.unpack_data = revs; + + init_tree_desc(&t, tree->buffer, tree->size); + unpack_trees(1, &t, &opts); + diffcore_std(&revs->diffopt); diff_flush(&revs->diffopt); - return ret; + return 0; } int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) -- cgit v1.2.3 From 204ce979a5ed6f182e56bea5282c7b4a2d91208a Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 20 Jan 2008 15:19:56 +0000 Subject: Also use unpack_trees() in do_diff_cache() As in run_diff_index(), we call unpack_trees() with the oneway_diff() function in do_diff_cache() now. This makes the function diff_cache() obsolete. Signed-off-by: Johannes Schindelin Signed-off-by: Linus Torvalds --- diff-lib.c | 92 +++++++++----------------------------------------------------- 1 file changed, 13 insertions(+), 79 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 045f3cc58e..03eaa7cef3 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -577,81 +577,6 @@ static int show_modified(struct rev_info *revs, return 0; } -static int diff_cache(struct rev_info *revs, - struct cache_entry **ac, int entries, - const char **pathspec, - int cached, int match_missing) -{ - while (entries) { - struct cache_entry *ce = *ac; - int same = (entries > 1) && ce_same_name(ce, ac[1]); - - if (DIFF_OPT_TST(&revs->diffopt, QUIET) && - DIFF_OPT_TST(&revs->diffopt, HAS_CHANGES)) - break; - - if (!ce_path_match(ce, pathspec)) - goto skip_entry; - - switch (ce_stage(ce)) { - case 0: - /* No stage 1 entry? That means it's a new file */ - if (!same) { - show_new_file(revs, ce, cached, match_missing); - break; - } - /* Show difference between old and new */ - show_modified(revs, ac[1], ce, 1, - cached, match_missing); - break; - case 1: - /* No stage 3 (merge) entry? - * That means it's been deleted. - */ - if (!same) { - diff_index_show_file(revs, "-", ce, - ce->sha1, ce->ce_mode); - break; - } - /* We come here with ce pointing at stage 1 - * (original tree) and ac[1] pointing at stage - * 3 (unmerged). show-modified with - * report-missing set to false does not say the - * file is deleted but reports true if work - * tree does not have it, in which case we - * fall through to report the unmerged state. - * Otherwise, we show the differences between - * the original tree and the work tree. - */ - if (!cached && - !show_modified(revs, ce, ac[1], 0, - cached, match_missing)) - break; - diff_unmerge(&revs->diffopt, ce->name, - ce->ce_mode, ce->sha1); - break; - case 3: - diff_unmerge(&revs->diffopt, ce->name, - 0, null_sha1); - break; - - default: - die("impossible cache entry stage"); - } - -skip_entry: - /* - * Ignore all the different stages for this file, - * we've handled the relevant cases now. - */ - do { - ac++; - entries--; - } while (entries && ce_same_name(ce, ac[0])); - } - return 0; -} - /* * This turns all merge entries into "stage 3". That guarantees that * when we read in the new tree (into "stage 1"), we won't lose sight @@ -826,6 +751,8 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) int i; struct cache_entry **dst; struct cache_entry *last = NULL; + struct unpack_trees_options opts; + struct tree_desc t; /* * This is used by git-blame to run diff-cache internally; @@ -853,8 +780,15 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) tree = parse_tree_indirect(tree_sha1); if (!tree) die("bad tree object %s", sha1_to_hex(tree_sha1)); - if (read_tree(tree, 1, opt->paths)) - return error("unable to read tree %s", sha1_to_hex(tree_sha1)); - return diff_cache(&revs, active_cache, active_nr, revs.prune_data, - 1, 0); + + memset(&opts, 0, sizeof(opts)); + opts.head_idx = 1; + opts.index_only = 1; + opts.merge = 1; + opts.fn = oneway_diff; + opts.unpack_data = &revs; + + init_tree_desc(&t, tree->buffer, tree->size); + unpack_trees(1, &t, &opts); + return 0; } -- cgit v1.2.3 From 203a2fe117070964a5bf7cc940a742cad7a19fca Mon Sep 17 00:00:00 2001 From: Daniel Barkalow Date: Thu, 7 Feb 2008 11:39:48 -0500 Subject: Allow callers of unpack_trees() to handle failure Return an error from unpack_trees() instead of calling die(), and exit with an error in read-tree, builtin-commit, and diff-lib. merge-recursive already expected an error return from unpack_trees, so it doesn't need to be changed. The merge function can return negative to abort. This will be used in builtin-checkout -m. Signed-off-by: Daniel Barkalow --- diff-lib.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 03eaa7cef3..94b150e830 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -737,7 +737,8 @@ int run_diff_index(struct rev_info *revs, int cached) opts.unpack_data = revs; init_tree_desc(&t, tree->buffer, tree->size); - unpack_trees(1, &t, &opts); + if (unpack_trees(1, &t, &opts)) + exit(128); diffcore_std(&revs->diffopt); diff_flush(&revs->diffopt); @@ -789,6 +790,7 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) opts.unpack_data = &revs; init_tree_desc(&t, tree->buffer, tree->size); - unpack_trees(1, &t, &opts); + if (unpack_trees(1, &t, &opts)) + exit(128); return 0; } -- cgit v1.2.3 From c8c16f2865f7c9c0d59b31ce66d50a4ecae72fd0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 2 Mar 2008 00:57:26 -0800 Subject: diff-lib.c: constness strengthening The internal implementation of diff-index codepath used to use non const pointer to pass sha1 around, but it did not have to. With this, we can also lose the private no_sha1[] array, as we can use the public null_sha1[] array that exists exactly for the same purpose. Signed-off-by: Junio C Hamano --- diff-lib.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 94b150e830..4581b594d0 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -472,22 +472,21 @@ int run_diff_files(struct rev_info *revs, unsigned int option) static void diff_index_show_file(struct rev_info *revs, const char *prefix, struct cache_entry *ce, - unsigned char *sha1, unsigned int mode) + const unsigned char *sha1, unsigned int mode) { diff_addremove(&revs->diffopt, prefix[0], mode, sha1, ce->name, NULL); } static int get_stat_data(struct cache_entry *ce, - unsigned char **sha1p, + const unsigned char **sha1p, unsigned int *modep, int cached, int match_missing) { - unsigned char *sha1 = ce->sha1; + const unsigned char *sha1 = ce->sha1; unsigned int mode = ce->ce_mode; if (!cached) { - static unsigned char no_sha1[20]; int changed; struct stat st; if (lstat(ce->name, &st) < 0) { @@ -501,7 +500,7 @@ static int get_stat_data(struct cache_entry *ce, changed = ce_match_stat(ce, &st, 0); if (changed) { mode = ce_mode_from_stat(ce, st.st_mode); - sha1 = no_sha1; + sha1 = null_sha1; } } @@ -514,7 +513,7 @@ static void show_new_file(struct rev_info *revs, struct cache_entry *new, int cached, int match_missing) { - unsigned char *sha1; + const unsigned char *sha1; unsigned int mode; /* New file in the index: it might actually be different in @@ -533,7 +532,7 @@ static int show_modified(struct rev_info *revs, int cached, int match_missing) { unsigned int mode, oldmode; - unsigned char *sha1; + const unsigned char *sha1; if (get_stat_data(new, &sha1, &mode, cached, match_missing) < 0) { if (report_missing) -- cgit v1.2.3 From bc052d7f435f8f729127cc4790484865c1a974b9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 6 Mar 2008 12:26:14 -0800 Subject: Make 'unpack_trees()' take the index to work on as an argument This is just a very mechanical conversion, and makes everybody set it to '&the_index' before calling, but at least it makes it more explicit where we work with the index. The next stage would be to split that index usage up into a 'source' and a 'destination' index, so that we can unpack into a different index than we started out from. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- diff-lib.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 4581b594d0..e359058d0b 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -734,6 +734,7 @@ int run_diff_index(struct rev_info *revs, int cached) opts.merge = 1; opts.fn = oneway_diff; opts.unpack_data = revs; + opts.index = &the_index; init_tree_desc(&t, tree->buffer, tree->size); if (unpack_trees(1, &t, &opts)) @@ -787,6 +788,7 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) opts.merge = 1; opts.fn = oneway_diff; opts.unpack_data = &revs; + opts.index = &the_index; init_tree_desc(&t, tree->buffer, tree->size); if (unpack_trees(1, &t, &opts)) -- cgit v1.2.3 From 34110cd4e394e3f92c01a4709689b384c34645d8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 6 Mar 2008 18:12:28 -0800 Subject: Make 'unpack_trees()' have a separate source and destination index We will always unpack into our own internal index, but we will take the source from wherever specified, and we will optionally write the result to a specified index (optionally, because not everybody even _wants_ any result: the index diffing really wants to just walk the tree and index in parallel). This ends up removing a fair number more lines than it adds, for the simple reason that we can now skip all the crud that tried to be oh-so-careful about maintaining our position in the index as we were traversing and modifying it. Since we don't actually modify the source index any more, we can just update the 'o->pos' pointer without worrying about whether an index entry got removed or replaced or added to. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- diff-lib.c | 49 ++++++++----------------------------------------- 1 file changed, 8 insertions(+), 41 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index e359058d0b..9520773f3b 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -600,8 +600,7 @@ static void mark_merge_entries(void) */ static void do_oneway_diff(struct unpack_trees_options *o, struct cache_entry *idx, - struct cache_entry *tree, - int idx_pos, int idx_nr) + struct cache_entry *tree) { struct rev_info *revs = o->unpack_data; int match_missing, cached; @@ -642,34 +641,6 @@ static void do_oneway_diff(struct unpack_trees_options *o, show_modified(revs, tree, idx, 1, cached, match_missing); } -/* - * Count how many index entries go with the first one - */ -static inline int count_skip(const struct cache_entry *src, int pos) -{ - int skip = 1; - - /* We can only have multiple entries if the first one is not stage-0 */ - if (ce_stage(src)) { - struct cache_entry **p = active_cache + pos; - int namelen = ce_namelen(src); - - for (;;) { - const struct cache_entry *ce; - pos++; - if (pos >= active_nr) - break; - ce = *++p; - if (ce_namelen(ce) != namelen) - break; - if (memcmp(ce->name, src->name, namelen)) - break; - skip++; - } - } - return skip; -} - /* * The unpack_trees() interface is designed for merging, so * the different source entries are designed primarily for @@ -685,18 +656,12 @@ static inline int count_skip(const struct cache_entry *src, int pos) * the fairly complex unpack_trees() semantic requirements, including * the skipping, the path matching, the type conflict cases etc. */ -static int oneway_diff(struct cache_entry **src, - struct unpack_trees_options *o, - int index_pos) +static int oneway_diff(struct cache_entry **src, struct unpack_trees_options *o) { - int skip = 0; struct cache_entry *idx = src[0]; struct cache_entry *tree = src[1]; struct rev_info *revs = o->unpack_data; - if (index_pos >= 0) - skip = count_skip(idx, index_pos); - /* * Unpack-trees generates a DF/conflict entry if * there was a directory in the index and a tree @@ -707,9 +672,9 @@ static int oneway_diff(struct cache_entry **src, tree = NULL; if (ce_path_match(idx ? idx : tree, revs->prune_data)) - do_oneway_diff(o, idx, tree, index_pos, skip); + do_oneway_diff(o, idx, tree); - return skip; + return 0; } int run_diff_index(struct rev_info *revs, int cached) @@ -734,7 +699,8 @@ int run_diff_index(struct rev_info *revs, int cached) opts.merge = 1; opts.fn = oneway_diff; opts.unpack_data = revs; - opts.index = &the_index; + opts.src_index = &the_index; + opts.dst_index = NULL; init_tree_desc(&t, tree->buffer, tree->size); if (unpack_trees(1, &t, &opts)) @@ -788,7 +754,8 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) opts.merge = 1; opts.fn = oneway_diff; opts.unpack_data = &revs; - opts.index = &the_index; + opts.src_index = &the_index; + opts.dst_index = &the_index; init_tree_desc(&t, tree->buffer, tree->size); if (unpack_trees(1, &t, &opts)) -- cgit v1.2.3 From 20a16eb33eee99fd3eab00c72f012b98d4eeee76 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 10 Mar 2008 23:51:13 -0700 Subject: unpack_trees(): fix diff-index regression. When skip_unmerged option is not given, unpack_trees() should not just skip unmerged cache entries but keep them in the result for the caller to sort them out. For callers other than diff-index, the incoming index should never be unmerged, but diff-index is a special case caller. Signed-off-by: Junio C Hamano --- diff-lib.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 9520773f3b..52dbac34a4 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -641,6 +641,21 @@ static void do_oneway_diff(struct unpack_trees_options *o, show_modified(revs, tree, idx, 1, cached, match_missing); } +static inline void skip_same_name(struct cache_entry *ce, struct unpack_trees_options *o) +{ + int len = ce_namelen(ce); + const struct index_state *index = o->src_index; + + while (o->pos < index->cache_nr) { + struct cache_entry *next = index->cache[o->pos]; + if (len != ce_namelen(next)) + break; + if (memcmp(ce->name, next->name, len)) + break; + o->pos++; + } +} + /* * The unpack_trees() interface is designed for merging, so * the different source entries are designed primarily for @@ -662,6 +677,9 @@ static int oneway_diff(struct cache_entry **src, struct unpack_trees_options *o) struct cache_entry *tree = src[1]; struct rev_info *revs = o->unpack_data; + if (idx && ce_stage(idx)) + skip_same_name(idx, o); + /* * Unpack-trees generates a DF/conflict entry if * there was a directory in the index and a tree -- cgit v1.2.3 From 948dd346fd6748f8c2c0ae488759cbbd05a09320 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 30 Mar 2008 17:29:48 -0700 Subject: diff-index: careful when inspecting work tree items Earlier, if you changed a staged path into a directory in the work tree, we happily ran lstat(2) on it and found that it exists, and declared that the user changed it to a gitlink. This is wrong for two reasons: (1) It may be a directory, but it may not be a submodule, and in the latter case, the change we need to report is "the blob at the path has disappeared". We need to check with resolve_gitlink_ref() to be consistent with what "git add" and "git update-index --add" does. (2) lstat(2) may have succeeded only because a leading component of the path was turned into a symbolic link that points at something that exists in the work tree. In such a case, the path itself does not exist anymore, as far as the index is concerned. This fixes these breakages in diff-index that the previous patch has exposed. Signed-off-by: Junio C Hamano --- diff-lib.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 14 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 52dbac34a4..ad9ed13fc9 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -10,6 +10,7 @@ #include "cache-tree.h" #include "path-list.h" #include "unpack-trees.h" +#include "refs.h" /* * diff-files @@ -333,6 +334,26 @@ int run_diff_files_cmd(struct rev_info *revs, int argc, const char **argv) } return run_diff_files(revs, options); } +/* + * See if work tree has an entity that can be staged. Return 0 if so, + * return 1 if not and return -1 if error. + */ +static int check_work_tree_entity(const struct cache_entry *ce, struct stat *st, char *symcache) +{ + if (lstat(ce->name, st) < 0) { + if (errno != ENOENT && errno != ENOTDIR) + return -1; + return 1; + } + if (has_symlink_leading_path(ce->name, symcache)) + return 1; + if (S_ISDIR(st->st_mode)) { + unsigned char sub[20]; + if (resolve_gitlink_ref(ce->name, "HEAD", sub)) + return 1; + } + return 0; +} int run_diff_files(struct rev_info *revs, unsigned int option) { @@ -468,6 +489,11 @@ int run_diff_files(struct rev_info *revs, unsigned int option) * diff-index */ +struct oneway_unpack_data { + struct rev_info *revs; + char symcache[PATH_MAX]; +}; + /* A file entry went away or appeared */ static void diff_index_show_file(struct rev_info *revs, const char *prefix, @@ -481,7 +507,8 @@ static void diff_index_show_file(struct rev_info *revs, static int get_stat_data(struct cache_entry *ce, const unsigned char **sha1p, unsigned int *modep, - int cached, int match_missing) + int cached, int match_missing, + struct oneway_unpack_data *cbdata) { const unsigned char *sha1 = ce->sha1; unsigned int mode = ce->ce_mode; @@ -489,8 +516,11 @@ static int get_stat_data(struct cache_entry *ce, if (!cached) { int changed; struct stat st; - if (lstat(ce->name, &st) < 0) { - if (errno == ENOENT && match_missing) { + changed = check_work_tree_entity(ce, &st, cbdata->symcache); + if (changed < 0) + return -1; + else if (changed) { + if (match_missing) { *sha1p = sha1; *modep = mode; return 0; @@ -509,23 +539,25 @@ static int get_stat_data(struct cache_entry *ce, return 0; } -static void show_new_file(struct rev_info *revs, +static void show_new_file(struct oneway_unpack_data *cbdata, struct cache_entry *new, int cached, int match_missing) { const unsigned char *sha1; unsigned int mode; + struct rev_info *revs = cbdata->revs; - /* New file in the index: it might actually be different in + /* + * New file in the index: it might actually be different in * the working copy. */ - if (get_stat_data(new, &sha1, &mode, cached, match_missing) < 0) + if (get_stat_data(new, &sha1, &mode, cached, match_missing, cbdata) < 0) return; diff_index_show_file(revs, "+", new, sha1, mode); } -static int show_modified(struct rev_info *revs, +static int show_modified(struct oneway_unpack_data *cbdata, struct cache_entry *old, struct cache_entry *new, int report_missing, @@ -533,8 +565,9 @@ static int show_modified(struct rev_info *revs, { unsigned int mode, oldmode; const unsigned char *sha1; + struct rev_info *revs = cbdata->revs; - if (get_stat_data(new, &sha1, &mode, cached, match_missing) < 0) { + if (get_stat_data(new, &sha1, &mode, cached, match_missing, cbdata) < 0) { if (report_missing) diff_index_show_file(revs, "-", old, old->sha1, old->ce_mode); @@ -602,7 +635,8 @@ static void do_oneway_diff(struct unpack_trees_options *o, struct cache_entry *idx, struct cache_entry *tree) { - struct rev_info *revs = o->unpack_data; + struct oneway_unpack_data *cbdata = o->unpack_data; + struct rev_info *revs = cbdata->revs; int match_missing, cached; /* @@ -625,7 +659,7 @@ static void do_oneway_diff(struct unpack_trees_options *o, * Something added to the tree? */ if (!tree) { - show_new_file(revs, idx, cached, match_missing); + show_new_file(cbdata, idx, cached, match_missing); return; } @@ -638,7 +672,7 @@ static void do_oneway_diff(struct unpack_trees_options *o, } /* Show difference between old and new */ - show_modified(revs, tree, idx, 1, cached, match_missing); + show_modified(cbdata, tree, idx, 1, cached, match_missing); } static inline void skip_same_name(struct cache_entry *ce, struct unpack_trees_options *o) @@ -675,7 +709,8 @@ static int oneway_diff(struct cache_entry **src, struct unpack_trees_options *o) { struct cache_entry *idx = src[0]; struct cache_entry *tree = src[1]; - struct rev_info *revs = o->unpack_data; + struct oneway_unpack_data *cbdata = o->unpack_data; + struct rev_info *revs = cbdata->revs; if (idx && ce_stage(idx)) skip_same_name(idx, o); @@ -702,6 +737,7 @@ int run_diff_index(struct rev_info *revs, int cached) const char *tree_name; struct unpack_trees_options opts; struct tree_desc t; + struct oneway_unpack_data unpack_cb; mark_merge_entries(); @@ -711,12 +747,14 @@ int run_diff_index(struct rev_info *revs, int cached) if (!tree) return error("bad tree object %s", tree_name); + unpack_cb.revs = revs; + unpack_cb.symcache[0] = '\0'; memset(&opts, 0, sizeof(opts)); opts.head_idx = 1; opts.index_only = cached; opts.merge = 1; opts.fn = oneway_diff; - opts.unpack_data = revs; + opts.unpack_data = &unpack_cb; opts.src_index = &the_index; opts.dst_index = NULL; @@ -738,6 +776,7 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) struct cache_entry *last = NULL; struct unpack_trees_options opts; struct tree_desc t; + struct oneway_unpack_data unpack_cb; /* * This is used by git-blame to run diff-cache internally; @@ -766,12 +805,14 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) if (!tree) die("bad tree object %s", sha1_to_hex(tree_sha1)); + unpack_cb.revs = &revs; + unpack_cb.symcache[0] = '\0'; memset(&opts, 0, sizeof(opts)); opts.head_idx = 1; opts.index_only = 1; opts.merge = 1; opts.fn = oneway_diff; - opts.unpack_data = &revs; + opts.unpack_data = &unpack_cb; opts.src_index = &the_index; opts.dst_index = &the_index; -- cgit v1.2.3 From f58dbf23c33e0e79622f4344b48ab5bc9bc360cc Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 30 Mar 2008 17:30:08 -0700 Subject: diff-files: careful when inspecting work tree items This fixes the same breakage in diff-files. Signed-off-by: Junio C Hamano --- diff-lib.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index ad9ed13fc9..069e4507ae 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -362,10 +362,12 @@ int run_diff_files(struct rev_info *revs, unsigned int option) int silent_on_removed = option & DIFF_SILENT_ON_REMOVED; unsigned ce_option = ((option & DIFF_RACY_IS_MODIFIED) ? CE_MATCH_RACY_IS_DIRTY : 0); + char symcache[PATH_MAX]; if (diff_unmerged_stage < 0) diff_unmerged_stage = 2; entries = active_nr; + symcache[0] = '\0'; for (i = 0; i < entries; i++) { struct stat st; unsigned int oldmode, newmode; @@ -397,16 +399,17 @@ int run_diff_files(struct rev_info *revs, unsigned int option) memset(&(dpath->parent[0]), 0, sizeof(struct combine_diff_parent)*5); - if (lstat(ce->name, &st) < 0) { - if (errno != ENOENT && errno != ENOTDIR) { + changed = check_work_tree_entity(ce, &st, symcache); + if (!changed) + dpath->mode = ce_mode_from_stat(ce, st.st_mode); + else { + if (changed < 0) { perror(ce->name); continue; } if (silent_on_removed) continue; } - else - dpath->mode = ce_mode_from_stat(ce, st.st_mode); while (i < entries) { struct cache_entry *nce = active_cache[i]; @@ -459,8 +462,10 @@ int run_diff_files(struct rev_info *revs, unsigned int option) if (ce_uptodate(ce)) continue; - if (lstat(ce->name, &st) < 0) { - if (errno != ENOENT && errno != ENOTDIR) { + + changed = check_work_tree_entity(ce, &st, symcache); + if (changed) { + if (changed < 0) { perror(ce->name); continue; } -- cgit v1.2.3 From 8fa29602d4cf8c9ec7d837f7308a9582a8372cb5 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 30 Mar 2008 12:39:25 -0700 Subject: diff-files: mark an index entry we know is up-to-date as such This does not make any difference when running diff-files alone, but if you internally run run_diff_files() and then run other operations further on the index, we do not have to run lstat(2) again on entries we already have checked. Signed-off-by: Junio C Hamano --- diff-lib.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 069e4507ae..6e7ab29e34 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -476,8 +476,11 @@ int run_diff_files(struct rev_info *revs, unsigned int option) continue; } changed = ce_match_stat(ce, &st, ce_option); - if (!changed && !DIFF_OPT_TST(&revs->diffopt, FIND_COPIES_HARDER)) - continue; + if (!changed) { + ce_mark_uptodate(ce); + if (!DIFF_OPT_TST(&revs->diffopt, FIND_COPIES_HARDER)) + continue; + } oldmode = ce->ce_mode; newmode = ce_mode_from_stat(ce, st.st_mode); diff_change(&revs->diffopt, oldmode, newmode, -- cgit v1.2.3 From 59b0c24daaac3c2203dd8ac2de4dfcad909481a5 Mon Sep 17 00:00:00 2001 From: Matthieu Moy Date: Thu, 24 Apr 2008 20:06:36 +0200 Subject: git-svn: detect and fail gracefully when dcommitting to a void The command git svn clone (URL of an empty SVN repo here) works, creates an empty git repository. I can perform the initial commit there, but then, "git svn dcommit" says : Use of uninitialized value in concatenation (.) or string at .../git-svn line 414. Committing to ... Unable to determine upstream SVN information from HEAD history I guess a correct management of the initial commit in git-svn would be hard to implement, but at least, the error message can be improved. First step is something like the patch below, and better would be for "git svn clone" to warn that it won't be able to do much with the cloned repo. Acked-by: Eric Wong Signed-off-by: Junio C Hamano --- diff-lib.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 069e4507ae..cfd629da48 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -264,6 +264,9 @@ int setup_diff_no_index(struct rev_info *revs, DIFF_OPT_SET(&revs->diffopt, EXIT_WITH_STATUS); break; } + if (nongit && argc != i + 2) + die("git diff [--no-index] takes two paths"); + if (argc != i + 2 || (!is_outside_repo(argv[i + 1], nongit, prefix) && !is_outside_repo(argv[i], nongit, prefix))) return -1; -- cgit v1.2.3 From 1392a377219adfee7cc7532e3c60e51d79ab40b1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 3 May 2008 17:04:42 -0700 Subject: diff: a submodule not checked out is not modified 948dd34 (diff-index: careful when inspecting work tree items, 2008-03-30) made the work tree check careful not to be fooled by a new directory that exists at a place the index expects a blob. For such a change to be a typechange from blob to submodule, the new directory has to be a repository. However, if the index expects a submodule there, we should not insist the work tree entity to be a repository --- a simple directory that is not a full fledged repository (even an empty directory would do) should be considered an unmodified subproject, because that is how a superproject with a submodule is checked out sparsely by default. This makes the function check_work_tree_entity() even more careful not to report a submodule that is not checked out as removed. It fixes the recently added test in t4027. Signed-off-by: Junio C Hamano --- diff-lib.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index cfd629da48..e0ebcdcf54 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -337,9 +337,15 @@ int run_diff_files_cmd(struct rev_info *revs, int argc, const char **argv) } return run_diff_files(revs, options); } + /* - * See if work tree has an entity that can be staged. Return 0 if so, - * return 1 if not and return -1 if error. + * Has the work tree entity been removed? + * + * Return 1 if it was removed from the work tree, 0 if an entity to be + * compared with the cache entry ce still exists (the latter includes + * the case where a directory that is not a submodule repository + * exists for ce that is a submodule -- it is a submodule that is not + * checked out). Return negative for an error. */ static int check_work_tree_entity(const struct cache_entry *ce, struct stat *st, char *symcache) { @@ -352,7 +358,20 @@ static int check_work_tree_entity(const struct cache_entry *ce, struct stat *st, return 1; if (S_ISDIR(st->st_mode)) { unsigned char sub[20]; - if (resolve_gitlink_ref(ce->name, "HEAD", sub)) + + /* + * If ce is already a gitlink, we can have a plain + * directory (i.e. the submodule is not checked out), + * or a checked out submodule. Either case this is not + * a case where something was removed from the work tree, + * so we will return 0. + * + * Otherwise, if the directory is not a submodule + * repository, that means ce which was a blob turned into + * a directory --- the blob was removed! + */ + if (!S_ISGITLINK(ce->ce_mode) && + resolve_gitlink_ref(ce->name, "HEAD", sub)) return 1; } return 0; -- cgit v1.2.3 From 451244d724f921eca9ffaf526d45c825f7c6f4eb Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 3 May 2008 17:23:46 -0700 Subject: diff-lib.c: rename check_work_tree_entity() The function is about checking for removed work tree item, so name it accordingly to avoid future confusion. Signed-off-by: Junio C Hamano --- diff-lib.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index e0ebcdcf54..4a3e25536c 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -347,7 +347,7 @@ int run_diff_files_cmd(struct rev_info *revs, int argc, const char **argv) * exists for ce that is a submodule -- it is a submodule that is not * checked out). Return negative for an error. */ -static int check_work_tree_entity(const struct cache_entry *ce, struct stat *st, char *symcache) +static int check_removed(const struct cache_entry *ce, struct stat *st, char *symcache) { if (lstat(ce->name, st) < 0) { if (errno != ENOENT && errno != ENOTDIR) @@ -421,7 +421,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) memset(&(dpath->parent[0]), 0, sizeof(struct combine_diff_parent)*5); - changed = check_work_tree_entity(ce, &st, symcache); + changed = check_removed(ce, &st, symcache); if (!changed) dpath->mode = ce_mode_from_stat(ce, st.st_mode); else { @@ -485,7 +485,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) if (ce_uptodate(ce)) continue; - changed = check_work_tree_entity(ce, &st, symcache); + changed = check_removed(ce, &st, symcache); if (changed) { if (changed < 0) { perror(ce->name); @@ -543,7 +543,7 @@ static int get_stat_data(struct cache_entry *ce, if (!cached) { int changed; struct stat st; - changed = check_work_tree_entity(ce, &st, cbdata->symcache); + changed = check_removed(ce, &st, cbdata->symcache); if (changed < 0) return -1; else if (changed) { -- cgit v1.2.3 From c40641b77b0274186fd1b327d5dc3246f814aaaf Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 9 May 2008 09:21:07 -0700 Subject: Optimize symlink/directory detection This is the base for making symlink detection in the middle fo a pathname saner and (much) more efficient. Under various loads, we want to verify that the full path leading up to a filename is a real directory tree, and that when we successfully do an 'lstat()' on a filename, we don't get a false positive due to a symlink in the middle of the path that git should have seen as a symlink, not as a normal path component. The 'has_symlink_leading_path()' function already did this, and cached a single level of symlink information, but didn't cache the _lack_ of a symlink, so the normal behaviour was actually the wrong way around, and we ended up doing an 'lstat()' on each path component to check that it was a real directory. This caches the last detected full directory and symlink entries, and speeds up especially deep directory structures a lot by avoiding to lstat() all the directories leading up to each entry in the index. [ This can - and should - probably be extended upon so that we eventually never do a bare 'lstat()' on any path entries at *all* when checking the index, but always check the full path carefully. Right now we do not generally check the whole path for all our normal quick index revalidation. We should also make sure that we're careful about all the invalidation, ie when we remove a link and replace it by a directory we should invalidate the symlink cache if it matches (and vice versa for the directory cache). But regardless, the basic function needs to be sane to do that. The old 'has_symlink_leading_path()' was not capable enough - or indeed the code readable enough - to really do that sanely. So I'm pushing this as not just an optimization, but as a base for further work. ] Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- diff-lib.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index c5894c7c5a..fe2ccec7e6 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -347,14 +347,14 @@ int run_diff_files_cmd(struct rev_info *revs, int argc, const char **argv) * exists for ce that is a submodule -- it is a submodule that is not * checked out). Return negative for an error. */ -static int check_removed(const struct cache_entry *ce, struct stat *st, char *symcache) +static int check_removed(const struct cache_entry *ce, struct stat *st) { if (lstat(ce->name, st) < 0) { if (errno != ENOENT && errno != ENOTDIR) return -1; return 1; } - if (has_symlink_leading_path(ce->name, symcache)) + if (has_symlink_leading_path(ce_namelen(ce), ce->name)) return 1; if (S_ISDIR(st->st_mode)) { unsigned char sub[20]; @@ -421,7 +421,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) memset(&(dpath->parent[0]), 0, sizeof(struct combine_diff_parent)*5); - changed = check_removed(ce, &st, symcache); + changed = check_removed(ce, &st); if (!changed) dpath->mode = ce_mode_from_stat(ce, st.st_mode); else { @@ -485,7 +485,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) if (ce_uptodate(ce)) continue; - changed = check_removed(ce, &st, symcache); + changed = check_removed(ce, &st); if (changed) { if (changed < 0) { perror(ce->name); @@ -546,7 +546,7 @@ static int get_stat_data(struct cache_entry *ce, if (!cached) { int changed; struct stat st; - changed = check_removed(ce, &st, cbdata->symcache); + changed = check_removed(ce, &st); if (changed < 0) return -1; else if (changed) { -- cgit v1.2.3 From 0569e9b8cea20d5eedfec66730a9711a0907ab0d Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 23 May 2008 22:28:56 -0700 Subject: "git diff": do not ignore index without --no-index Even if "foo" and/or "bar" does not exist in index, "git diff foo bar" should not change behaviour drastically from "git diff foo bar baz" or "git diff foo". A feature that "sometimes works and is handy" is an unreliable cute hack. "git diff foo bar" outside a git repository continues to work as a more colourful alternative to "diff -u" as before. Signed-off-by: Junio C Hamano --- diff-lib.c | 323 ------------------------------------------------------------- 1 file changed, 323 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index fe2ccec7e6..b17722d66a 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -8,7 +8,6 @@ #include "diffcore.h" #include "revision.h" #include "cache-tree.h" -#include "path-list.h" #include "unpack-trees.h" #include "refs.h" @@ -16,328 +15,6 @@ * diff-files */ -static int read_directory(const char *path, struct path_list *list) -{ - DIR *dir; - struct dirent *e; - - if (!(dir = opendir(path))) - return error("Could not open directory %s", path); - - while ((e = readdir(dir))) - if (strcmp(".", e->d_name) && strcmp("..", e->d_name)) - path_list_insert(e->d_name, list); - - closedir(dir); - return 0; -} - -static int get_mode(const char *path, int *mode) -{ - struct stat st; - - if (!path || !strcmp(path, "/dev/null")) - *mode = 0; - else if (!strcmp(path, "-")) - *mode = create_ce_mode(0666); - else if (stat(path, &st)) - return error("Could not access '%s'", path); - else - *mode = st.st_mode; - return 0; -} - -static int queue_diff(struct diff_options *o, - const char *name1, const char *name2) -{ - int mode1 = 0, mode2 = 0; - - if (get_mode(name1, &mode1) || get_mode(name2, &mode2)) - return -1; - - if (mode1 && mode2 && S_ISDIR(mode1) != S_ISDIR(mode2)) - return error("file/directory conflict: %s, %s", name1, name2); - - if (S_ISDIR(mode1) || S_ISDIR(mode2)) { - char buffer1[PATH_MAX], buffer2[PATH_MAX]; - struct path_list p1 = {NULL, 0, 0, 1}, p2 = {NULL, 0, 0, 1}; - int len1 = 0, len2 = 0, i1, i2, ret = 0; - - if (name1 && read_directory(name1, &p1)) - return -1; - if (name2 && read_directory(name2, &p2)) { - path_list_clear(&p1, 0); - return -1; - } - - if (name1) { - len1 = strlen(name1); - if (len1 > 0 && name1[len1 - 1] == '/') - len1--; - memcpy(buffer1, name1, len1); - buffer1[len1++] = '/'; - } - - if (name2) { - len2 = strlen(name2); - if (len2 > 0 && name2[len2 - 1] == '/') - len2--; - memcpy(buffer2, name2, len2); - buffer2[len2++] = '/'; - } - - for (i1 = i2 = 0; !ret && (i1 < p1.nr || i2 < p2.nr); ) { - const char *n1, *n2; - int comp; - - if (i1 == p1.nr) - comp = 1; - else if (i2 == p2.nr) - comp = -1; - else - comp = strcmp(p1.items[i1].path, - p2.items[i2].path); - - if (comp > 0) - n1 = NULL; - else { - n1 = buffer1; - strncpy(buffer1 + len1, p1.items[i1++].path, - PATH_MAX - len1); - } - - if (comp < 0) - n2 = NULL; - else { - n2 = buffer2; - strncpy(buffer2 + len2, p2.items[i2++].path, - PATH_MAX - len2); - } - - ret = queue_diff(o, n1, n2); - } - path_list_clear(&p1, 0); - path_list_clear(&p2, 0); - - return ret; - } else { - struct diff_filespec *d1, *d2; - - if (DIFF_OPT_TST(o, REVERSE_DIFF)) { - unsigned tmp; - const char *tmp_c; - tmp = mode1; mode1 = mode2; mode2 = tmp; - tmp_c = name1; name1 = name2; name2 = tmp_c; - } - - if (!name1) - name1 = "/dev/null"; - if (!name2) - name2 = "/dev/null"; - d1 = alloc_filespec(name1); - d2 = alloc_filespec(name2); - fill_filespec(d1, null_sha1, mode1); - fill_filespec(d2, null_sha1, mode2); - - diff_queue(&diff_queued_diff, d1, d2); - return 0; - } -} - -/* - * Does the path name a blob in the working tree, or a directory - * in the working tree? - */ -static int is_in_index(const char *path) -{ - int len, pos; - struct cache_entry *ce; - - len = strlen(path); - while (path[len-1] == '/') - len--; - if (!len) - return 1; /* "." */ - pos = cache_name_pos(path, len); - if (0 <= pos) - return 1; - pos = -1 - pos; - while (pos < active_nr) { - ce = active_cache[pos++]; - if (ce_namelen(ce) <= len || - strncmp(ce->name, path, len) || - (ce->name[len] > '/')) - break; /* path cannot be a prefix */ - if (ce->name[len] == '/') - return 1; - } - return 0; -} - -static int handle_diff_files_args(struct rev_info *revs, - int argc, const char **argv, - unsigned int *options) -{ - *options = 0; - - /* revs->max_count == -2 means --no-index */ - while (1 < argc && argv[1][0] == '-') { - if (!strcmp(argv[1], "--base")) - revs->max_count = 1; - else if (!strcmp(argv[1], "--ours")) - revs->max_count = 2; - else if (!strcmp(argv[1], "--theirs")) - revs->max_count = 3; - else if (!strcmp(argv[1], "-n") || - !strcmp(argv[1], "--no-index")) { - revs->max_count = -2; - DIFF_OPT_SET(&revs->diffopt, EXIT_WITH_STATUS); - DIFF_OPT_SET(&revs->diffopt, NO_INDEX); - } - else if (!strcmp(argv[1], "-q")) - *options |= DIFF_SILENT_ON_REMOVED; - else - return error("invalid option: %s", argv[1]); - argv++; argc--; - } - - if (revs->max_count == -1 && revs->diffopt.nr_paths == 2) { - /* - * If two files are specified, and at least one is untracked, - * default to no-index. - */ - read_cache(); - if (!is_in_index(revs->diffopt.paths[0]) || - !is_in_index(revs->diffopt.paths[1])) { - revs->max_count = -2; - DIFF_OPT_SET(&revs->diffopt, NO_INDEX); - } - } - - /* - * Make sure there are NO revision (i.e. pending object) parameter, - * rev.max_count is reasonable (0 <= n <= 3), - * there is no other revision filtering parameters. - */ - if (revs->pending.nr || revs->max_count > 3 || - revs->min_age != -1 || revs->max_age != -1) - return error("no revision allowed with diff-files"); - - if (revs->max_count == -1 && - (revs->diffopt.output_format & DIFF_FORMAT_PATCH)) - revs->combine_merges = revs->dense_combined_merges = 1; - - return 0; -} - -static int is_outside_repo(const char *path, int nongit, const char *prefix) -{ - int i; - if (nongit || !strcmp(path, "-") || is_absolute_path(path)) - return 1; - if (prefixcmp(path, "../")) - return 0; - if (!prefix) - return 1; - for (i = strlen(prefix); !prefixcmp(path, "../"); ) { - while (i > 0 && prefix[i - 1] != '/') - i--; - if (--i < 0) - return 1; - path += 3; - } - return 0; -} - -int setup_diff_no_index(struct rev_info *revs, - int argc, const char ** argv, int nongit, const char *prefix) -{ - int i; - for (i = 1; i < argc; i++) - if (argv[i][0] != '-' || argv[i][1] == '\0') - break; - else if (!strcmp(argv[i], "--")) { - i++; - break; - } else if (i < argc - 3 && !strcmp(argv[i], "--no-index")) { - i = argc - 3; - DIFF_OPT_SET(&revs->diffopt, EXIT_WITH_STATUS); - break; - } - if (nongit && argc != i + 2) - die("git diff [--no-index] takes two paths"); - - if (argc != i + 2 || (!is_outside_repo(argv[i + 1], nongit, prefix) && - !is_outside_repo(argv[i], nongit, prefix))) - return -1; - - diff_setup(&revs->diffopt); - for (i = 1; i < argc - 2; ) - if (!strcmp(argv[i], "--no-index")) - i++; - else { - int j = diff_opt_parse(&revs->diffopt, - argv + i, argc - i); - if (!j) - die("invalid diff option/value: %s", argv[i]); - i += j; - } - - if (prefix) { - int len = strlen(prefix); - - revs->diffopt.paths = xcalloc(2, sizeof(char*)); - for (i = 0; i < 2; i++) { - const char *p = argv[argc - 2 + i]; - /* - * stdin should be spelled as '-'; if you have - * path that is '-', spell it as ./-. - */ - p = (strcmp(p, "-") - ? xstrdup(prefix_filename(prefix, len, p)) - : p); - revs->diffopt.paths[i] = p; - } - } - else - revs->diffopt.paths = argv + argc - 2; - revs->diffopt.nr_paths = 2; - DIFF_OPT_SET(&revs->diffopt, NO_INDEX); - revs->max_count = -2; - if (diff_setup_done(&revs->diffopt) < 0) - die("diff_setup_done failed"); - return 0; -} - -int run_diff_files_cmd(struct rev_info *revs, int argc, const char **argv) -{ - unsigned int options; - - if (handle_diff_files_args(revs, argc, argv, &options)) - return -1; - - if (DIFF_OPT_TST(&revs->diffopt, NO_INDEX)) { - if (revs->diffopt.nr_paths != 2) - return error("need two files/directories with --no-index"); - if (queue_diff(&revs->diffopt, revs->diffopt.paths[0], - revs->diffopt.paths[1])) - return -1; - diffcore_std(&revs->diffopt); - diff_flush(&revs->diffopt); - /* - * The return code for --no-index imitates diff(1): - * 0 = no changes, 1 = changes, else error - */ - return revs->diffopt.found_changes; - } - - if (read_cache() < 0) { - perror("read_cache"); - return -1; - } - return run_diff_files(revs, options); -} - /* * Has the work tree entity been removed? * -- cgit v1.2.3 From fd55a19eb1d49ae54008d932a65f79cd6fda45c9 Mon Sep 17 00:00:00 2001 From: Dmitry Potapov Date: Wed, 16 Jul 2008 18:54:02 +0400 Subject: Fix buffer overflow in git diff If PATH_MAX on your system is smaller than a path stored, it may cause buffer overflow and stack corruption in diff_addremove() and diff_change() functions when running git-diff Signed-off-by: Dmitry Potapov Signed-off-by: Junio C Hamano --- diff-lib.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index b17722d66a..e7eaff9a68 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -171,7 +171,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) if (silent_on_removed) continue; diff_addremove(&revs->diffopt, '-', ce->ce_mode, - ce->sha1, ce->name, NULL); + ce->sha1, ce->name); continue; } changed = ce_match_stat(ce, &st, ce_option); @@ -184,7 +184,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) newmode = ce_mode_from_stat(ce, st.st_mode); diff_change(&revs->diffopt, oldmode, newmode, ce->sha1, (changed ? null_sha1 : ce->sha1), - ce->name, NULL); + ce->name); } diffcore_std(&revs->diffopt); @@ -208,7 +208,7 @@ static void diff_index_show_file(struct rev_info *revs, const unsigned char *sha1, unsigned int mode) { diff_addremove(&revs->diffopt, prefix[0], mode, - sha1, ce->name, NULL); + sha1, ce->name); } static int get_stat_data(struct cache_entry *ce, @@ -312,7 +312,7 @@ static int show_modified(struct oneway_unpack_data *cbdata, return 0; diff_change(&revs->diffopt, oldmode, mode, - old->sha1, sha1, old->name, NULL); + old->sha1, sha1, old->name); return 0; } -- cgit v1.2.3 From a5a818ee4877e4458e8e6741a03ac3b19941d58a Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 18 Aug 2008 20:08:09 -0700 Subject: diff: vary default prefix depending on what are compared With a new configuration "diff.mnemonicprefix", "git diff" shows the differences between various combinations of preimage and postimage trees with prefixes different from the standard "a/" and "b/". Hopefully this will make the distinction stand out for some people. "git diff" compares the (i)ndex and the (w)ork tree; "git diff HEAD" compares a (c)ommit and the (w)ork tree; "git diff --cached" compares a (c)ommit and the (i)ndex; "git-diff HEAD:file1 file2" compares an (o)bject and a (w)ork tree entity; "git diff --no-index a b" compares two non-git things (1) and (2). Because these mnemonics now have meanings, they are swapped when reverse diff is in effect and this feature is enabled. Signed-off-by: Junio C Hamano --- diff-lib.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index e7eaff9a68..ae96c64ca2 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -63,6 +63,8 @@ int run_diff_files(struct rev_info *revs, unsigned int option) ? CE_MATCH_RACY_IS_DIRTY : 0); char symcache[PATH_MAX]; + diff_set_mnemonic_prefix(&revs->diffopt, "i/", "w/"); + if (diff_unmerged_stage < 0) diff_unmerged_stage = 2; entries = active_nr; @@ -469,6 +471,7 @@ int run_diff_index(struct rev_info *revs, int cached) if (unpack_trees(1, &t, &opts)) exit(128); + diff_set_mnemonic_prefix(&revs->diffopt, "c/", cached ? "i/" : "w/"); diffcore_std(&revs->diffopt); diff_flush(&revs->diffopt); return 0; -- cgit v1.2.3 From ff7e6aad6d995a52e88cfeaae10cf8a768ff2358 Mon Sep 17 00:00:00 2001 From: Kjetil Barvik Date: Sun, 11 Jan 2009 19:36:42 +0100 Subject: Cleanup of unused symcache variable inside diff-lib.c Commit c40641b77b0274186fd1b327d5dc3246f814aaaf, 'Optimize symlink/directory detection' by Linus Torvalds, removed the 'char *symcache' parameter to the has_symlink_leading_path() function. This made all variables currently named 'symcache' inside diff-lib.c unnecessary. This also let us throw away the 'struct oneway_unpack_data', and instead directly use the 'struct rev_info *revs' member, which was the only member left after removal of the 'symcache[] array' member. The 'struct oneway_unpack_data' was introduced by the following commit: 948dd346 "diff-files: careful when inspecting work tree items" Impact: cleanup PATH_MAX bytes less memory stack usage in some cases Signed-off-by: Kjetil Barvik Signed-off-by: Junio C Hamano --- diff-lib.c | 40 +++++++++++----------------------------- 1 file changed, 11 insertions(+), 29 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index ae96c64ca2..a41e1ec07c 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -61,14 +61,12 @@ int run_diff_files(struct rev_info *revs, unsigned int option) int silent_on_removed = option & DIFF_SILENT_ON_REMOVED; unsigned ce_option = ((option & DIFF_RACY_IS_MODIFIED) ? CE_MATCH_RACY_IS_DIRTY : 0); - char symcache[PATH_MAX]; diff_set_mnemonic_prefix(&revs->diffopt, "i/", "w/"); if (diff_unmerged_stage < 0) diff_unmerged_stage = 2; entries = active_nr; - symcache[0] = '\0'; for (i = 0; i < entries; i++) { struct stat st; unsigned int oldmode, newmode; @@ -198,11 +196,6 @@ int run_diff_files(struct rev_info *revs, unsigned int option) * diff-index */ -struct oneway_unpack_data { - struct rev_info *revs; - char symcache[PATH_MAX]; -}; - /* A file entry went away or appeared */ static void diff_index_show_file(struct rev_info *revs, const char *prefix, @@ -216,8 +209,7 @@ static void diff_index_show_file(struct rev_info *revs, static int get_stat_data(struct cache_entry *ce, const unsigned char **sha1p, unsigned int *modep, - int cached, int match_missing, - struct oneway_unpack_data *cbdata) + int cached, int match_missing) { const unsigned char *sha1 = ce->sha1; unsigned int mode = ce->ce_mode; @@ -248,25 +240,24 @@ static int get_stat_data(struct cache_entry *ce, return 0; } -static void show_new_file(struct oneway_unpack_data *cbdata, +static void show_new_file(struct rev_info *revs, struct cache_entry *new, int cached, int match_missing) { const unsigned char *sha1; unsigned int mode; - struct rev_info *revs = cbdata->revs; /* * New file in the index: it might actually be different in * the working copy. */ - if (get_stat_data(new, &sha1, &mode, cached, match_missing, cbdata) < 0) + if (get_stat_data(new, &sha1, &mode, cached, match_missing) < 0) return; diff_index_show_file(revs, "+", new, sha1, mode); } -static int show_modified(struct oneway_unpack_data *cbdata, +static int show_modified(struct rev_info *revs, struct cache_entry *old, struct cache_entry *new, int report_missing, @@ -274,9 +265,8 @@ static int show_modified(struct oneway_unpack_data *cbdata, { unsigned int mode, oldmode; const unsigned char *sha1; - struct rev_info *revs = cbdata->revs; - if (get_stat_data(new, &sha1, &mode, cached, match_missing, cbdata) < 0) { + if (get_stat_data(new, &sha1, &mode, cached, match_missing) < 0) { if (report_missing) diff_index_show_file(revs, "-", old, old->sha1, old->ce_mode); @@ -344,8 +334,7 @@ static void do_oneway_diff(struct unpack_trees_options *o, struct cache_entry *idx, struct cache_entry *tree) { - struct oneway_unpack_data *cbdata = o->unpack_data; - struct rev_info *revs = cbdata->revs; + struct rev_info *revs = o->unpack_data; int match_missing, cached; /* @@ -368,7 +357,7 @@ static void do_oneway_diff(struct unpack_trees_options *o, * Something added to the tree? */ if (!tree) { - show_new_file(cbdata, idx, cached, match_missing); + show_new_file(revs, idx, cached, match_missing); return; } @@ -381,7 +370,7 @@ static void do_oneway_diff(struct unpack_trees_options *o, } /* Show difference between old and new */ - show_modified(cbdata, tree, idx, 1, cached, match_missing); + show_modified(revs, tree, idx, 1, cached, match_missing); } static inline void skip_same_name(struct cache_entry *ce, struct unpack_trees_options *o) @@ -418,8 +407,7 @@ static int oneway_diff(struct cache_entry **src, struct unpack_trees_options *o) { struct cache_entry *idx = src[0]; struct cache_entry *tree = src[1]; - struct oneway_unpack_data *cbdata = o->unpack_data; - struct rev_info *revs = cbdata->revs; + struct rev_info *revs = o->unpack_data; if (idx && ce_stage(idx)) skip_same_name(idx, o); @@ -446,7 +434,6 @@ int run_diff_index(struct rev_info *revs, int cached) const char *tree_name; struct unpack_trees_options opts; struct tree_desc t; - struct oneway_unpack_data unpack_cb; mark_merge_entries(); @@ -456,14 +443,12 @@ int run_diff_index(struct rev_info *revs, int cached) if (!tree) return error("bad tree object %s", tree_name); - unpack_cb.revs = revs; - unpack_cb.symcache[0] = '\0'; memset(&opts, 0, sizeof(opts)); opts.head_idx = 1; opts.index_only = cached; opts.merge = 1; opts.fn = oneway_diff; - opts.unpack_data = &unpack_cb; + opts.unpack_data = revs; opts.src_index = &the_index; opts.dst_index = NULL; @@ -486,7 +471,6 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) struct cache_entry *last = NULL; struct unpack_trees_options opts; struct tree_desc t; - struct oneway_unpack_data unpack_cb; /* * This is used by git-blame to run diff-cache internally; @@ -515,14 +499,12 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) if (!tree) die("bad tree object %s", sha1_to_hex(tree_sha1)); - unpack_cb.revs = &revs; - unpack_cb.symcache[0] = '\0'; memset(&opts, 0, sizeof(opts)); opts.head_idx = 1; opts.index_only = 1; opts.merge = 1; opts.fn = oneway_diff; - opts.unpack_data = &unpack_cb; + opts.unpack_data = &revs; opts.src_index = &the_index; opts.dst_index = &the_index; -- cgit v1.2.3 From 571998921d8fd4ee674545406aabb86987921252 Mon Sep 17 00:00:00 2001 From: Kjetil Barvik Date: Mon, 9 Feb 2009 21:54:06 +0100 Subject: lstat_cache(): swap func(length, string) into func(string, length) Swap function argument pair (length, string) into (string, length) to conform with the commonly used order inside the GIT source code. Also, add a note about this fact into the coding guidelines. Signed-off-by: Kjetil Barvik Signed-off-by: Junio C Hamano --- diff-lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index a41e1ec07c..a3ba20ee29 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -31,7 +31,7 @@ static int check_removed(const struct cache_entry *ce, struct stat *st) return -1; return 1; } - if (has_symlink_leading_path(ce_namelen(ce), ce->name)) + if (has_symlink_leading_path(ce->name, ce_namelen(ce))) return 1; if (S_ISDIR(st->st_mode)) { unsigned char sub[20]; -- cgit v1.2.3 From 75f3ff2eeaba820b37016f464b6d1078cb6260e2 Mon Sep 17 00:00:00 2001 From: Stephan Beyer Date: Tue, 10 Feb 2009 15:30:35 +0100 Subject: Generalize and libify index_is_dirty() to index_differs_from(...) index_is_dirty() in builtin-revert.c checks if the index is dirty. This patch generalizes this function to check if the index differs from a revision, i.e. the former index_is_dirty() behavior can now be achieved by index_differs_from("HEAD", 0). The second argument "diff_flags" allows to set further diff option flags like DIFF_OPT_IGNORE_SUBMODULES. See DIFF_OPT_* macros in diff.h for a list. index_differs_from() seems to be useful for more than builtin-revert.c, so it is moved into diff-lib.c and also used in builtin-commit.c. Yet to mention: - "rev.abbrev = 0;" can be safely removed. This has no impact on performance or functioning of neither setup_revisions() nor run_diff_index(). - rev.pending.objects is free()d because this fixes a leak. (Also see 295dd2ad "Fix memory leak in traverse_commit_list") Mentored-by: Daniel Barkalow Mentored-by: Christian Couder Signed-off-by: Stephan Beyer Signed-off-by: Junio C Hamano --- diff-lib.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index a41e1ec07c..79d0606834 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -513,3 +513,18 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) exit(128); return 0; } + +int index_differs_from(const char *def, int diff_flags) +{ + struct rev_info rev; + + init_revisions(&rev, NULL); + setup_revisions(0, NULL, &rev, def); + DIFF_OPT_SET(&rev.diffopt, QUIET); + DIFF_OPT_SET(&rev.diffopt, EXIT_WITH_STATUS); + rev.diffopt.flags |= diff_flags; + run_diff_index(&rev, 1); + if (rev.pending.alloc) + free(rev.pending.objects); + return (DIFF_OPT_TST(&rev.diffopt, HAS_CHANGES) != 0); +} -- cgit v1.2.3 From 658dd48c8572d0db49719cbef6605d384621d87c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 9 May 2009 14:57:30 -0700 Subject: Avoid unnecessary 'lstat()' calls in 'get_stat_data()' When we ask get_stat_data() to get the mode and size of an index entry, we can avoid the lstat() call if we have marked the index entry as being uptodate due to earlier lstat() calls. This avoids a lot of unnecessary lstat() calls in eg 'git checkout', where the last phase shows the differences to the working tree (requiring a diff), but earlier phases have already verified the index. On the kernel repo (with a fast machine and everything cached), this changes timings of a nul 'git checkout' from - Before (best of ten): 0.14user 0.05system 0:00.19elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+13237minor)pagefaults 0swaps - After 0.11user 0.03system 0:00.15elapsed 98%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+13235minor)pagefaults 0swaps so it can obviously be noticeable, although equally obviously it's not a show-stopper on this particular machine. The difference is likely larger on slower machines, or with operating systems that don't do as good a job of name caching. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- diff-lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index ae96c64ca2..d230efc146 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -222,7 +222,7 @@ static int get_stat_data(struct cache_entry *ce, const unsigned char *sha1 = ce->sha1; unsigned int mode = ce->ce_mode; - if (!cached) { + if (!cached && !ce_uptodate(ce)) { int changed; struct stat st; changed = check_removed(ce, &st); -- cgit v1.2.3 From b65982b60876c8f5f4d3b2898d5174f4812552b1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 20 May 2009 15:57:22 -0700 Subject: Optimize "diff-index --cached" using cache-tree When running "diff-index --cached" after making a change to only a small portion of the index, there is no point unpacking unchanged subtrees into the index recursively, only to find that all entries match anyway. Tweak unpack_trees() logic that is used to read in the tree object to catch the case where the tree entry we are looking at matches the index as a whole by looking at the cache-tree. As an exercise, after modifying a few paths in the kernel tree, here are a few numbers on my Athlon 64X2 3800+: (without patch, hot cache) $ /usr/bin/time git diff --cached --raw :100644 100644 b57e1f5... e69de29... M Makefile :100644 000000 8c86b72... 0000000... D arch/x86/Makefile :000000 100644 0000000... e69de29... A arche 0.07user 0.02system 0:00.09elapsed 102%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+9407minor)pagefaults 0swaps (with patch, hot cache) $ /usr/bin/time ../git.git/git-diff --cached --raw :100644 100644 b57e1f5... e69de29... M Makefile :100644 000000 8c86b72... 0000000... D arch/x86/Makefile :000000 100644 0000000... e69de29... A arche 0.02user 0.00system 0:00.02elapsed 103%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+2446minor)pagefaults 0swaps Cold cache numbers are very impressive, but it does not matter very much in practice: (without patch, cold cache) $ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches' $ /usr/bin/time git diff --cached --raw :100644 100644 b57e1f5... e69de29... M Makefile :100644 000000 8c86b72... 0000000... D arch/x86/Makefile :000000 100644 0000000... e69de29... A arche 0.06user 0.17system 0:10.26elapsed 2%CPU (0avgtext+0avgdata 0maxresident)k 247032inputs+0outputs (1172major+8237minor)pagefaults 0swaps (with patch, cold cache) $ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches' $ /usr/bin/time ../git.git/git-diff --cached --raw :100644 100644 b57e1f5... e69de29... M Makefile :100644 000000 8c86b72... 0000000... D arch/x86/Makefile :000000 100644 0000000... e69de29... A arche 0.02user 0.01system 0:01.01elapsed 3%CPU (0avgtext+0avgdata 0maxresident)k 18440inputs+0outputs (79major+2369minor)pagefaults 0swaps This of course helps "git status" as well. (without patch, hot cache) $ /usr/bin/time ../git.git/git-status >/dev/null 0.17user 0.18system 0:00.35elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+5336outputs (0major+10970minor)pagefaults 0swaps (with patch, hot cache) $ /usr/bin/time ../git.git/git-status >/dev/null 0.10user 0.16system 0:00.27elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+5336outputs (0major+3921minor)pagefaults 0swaps Signed-off-by: Junio C Hamano --- diff-lib.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index a310fb2ad0..1cb97af22d 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -446,6 +446,7 @@ int run_diff_index(struct rev_info *revs, int cached) memset(&opts, 0, sizeof(opts)); opts.head_idx = 1; opts.index_only = cached; + opts.diff_index_cached = cached; opts.merge = 1; opts.fn = oneway_diff; opts.unpack_data = revs; @@ -502,6 +503,7 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) memset(&opts, 0, sizeof(opts)); opts.head_idx = 1; opts.index_only = 1; + opts.diff_index_cached = 1; opts.merge = 1; opts.fn = oneway_diff; opts.unpack_data = &revs; -- cgit v1.2.3 From a0919ced8a5efe938cf97c74a0f851cbbe00aaf6 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 22 May 2009 23:14:25 -0700 Subject: Avoid "diff-index --cached" optimization under --find-copies-harder When find-copies-harder is in effect, the diff frontends are expected to feed all paths, not just changed paths, to the diffcore, so that copy sources can be picked up. In such a case, not descending into subtrees using the cache-tree information is simply wrong. Signed-off-by: Junio C Hamano --- diff-lib.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 1cb97af22d..ae75eacbcc 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -446,7 +446,8 @@ int run_diff_index(struct rev_info *revs, int cached) memset(&opts, 0, sizeof(opts)); opts.head_idx = 1; opts.index_only = cached; - opts.diff_index_cached = cached; + opts.diff_index_cached = (cached && + !DIFF_OPT_TST(&revs->diffopt, FIND_COPIES_HARDER)); opts.merge = 1; opts.fn = oneway_diff; opts.unpack_data = revs; @@ -503,7 +504,7 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) memset(&opts, 0, sizeof(opts)); opts.head_idx = 1; opts.index_only = 1; - opts.diff_index_cached = 1; + opts.diff_index_cached = !DIFF_OPT_TST(opt, FIND_COPIES_HARDER); opts.merge = 1; opts.fn = oneway_diff; opts.unpack_data = &revs; -- cgit v1.2.3