From ffc4b8012d9a4f92ef238ff72c0d15e9e1b402ed Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 11 Jun 2011 19:04:08 +0000 Subject: tag: speed up --contains calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we want to know if commit A contains commit B (or any one of a set of commits, B through Z), we generally calculate the merge bases and see if B is a merge base of A (or for a set, if any of the commits B through Z have that property). When we are going to check a series of commits A1 through An to see whether each contains B (e.g., because we are deciding which tags to show with "git tag --contains"), we do a series of merge base calculations. This can be very expensive, as we repeat a lot of traversal work. Instead, let's leverage the fact that we are going to use the same --contains list for each tag, and mark areas of the commit graph is definitely containing those commits, or definitely not containing those commits. Later tags can then stop traversing as soon as they see a previously calculated answer. This sped up "git tag --contains HEAD~200" in the linux-2.6 repository from: real 0m15.417s user 0m15.197s sys 0m0.220s to: real 0m5.329s user 0m5.144s sys 0m0.184s Signed-off-by: Jeff King Signed-off-by: Junio C Hamano Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/tag.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/tag.c b/builtin/tag.c index d311491e49..f7a7943c9d 100644 --- a/builtin/tag.c +++ b/builtin/tag.c @@ -12,6 +12,8 @@ #include "tag.h" #include "run-command.h" #include "parse-options.h" +#include "diff.h" +#include "revision.h" static const char * const git_tag_usage[] = { "git tag [-a|-s|-u ] [-f] [-m |-F ] []", @@ -31,6 +33,48 @@ struct tag_filter { #define PGP_SIGNATURE "-----BEGIN PGP SIGNATURE-----" +static int in_commit_list(const struct commit_list *want, struct commit *c) +{ + for (; want; want = want->next) + if (!hashcmp(want->item->object.sha1, c->object.sha1)) + return 1; + return 0; +} + +static int contains_recurse(struct commit *candidate, + const struct commit_list *want) +{ + struct commit_list *p; + + /* was it previously marked as containing a want commit? */ + if (candidate->object.flags & TMP_MARK) + return 1; + /* or marked as not possibly containing a want commit? */ + if (candidate->object.flags & UNINTERESTING) + return 0; + /* or are we it? */ + if (in_commit_list(want, candidate)) + return 1; + + if (parse_commit(candidate) < 0) + return 0; + + /* Otherwise recurse and mark ourselves for future traversals. */ + for (p = candidate->parents; p; p = p->next) { + if (contains_recurse(p->item, want)) { + candidate->object.flags |= TMP_MARK; + return 1; + } + } + candidate->object.flags |= UNINTERESTING; + return 0; +} + +static int contains(struct commit *candidate, const struct commit_list *want) +{ + return contains_recurse(candidate, want); +} + static int show_reference(const char *refname, const unsigned char *sha1, int flag, void *cb_data) { @@ -49,7 +93,7 @@ static int show_reference(const char *refname, const unsigned char *sha1, commit = lookup_commit_reference_gently(sha1, 1); if (!commit) return 0; - if (!is_descendant_of(commit, filter->with_commit)) + if (!contains(commit, filter->with_commit)) return 0; } -- cgit v1.2.3 From 0c811a7a6f93691604d48b1c13a6868c89971528 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 11 Jun 2011 19:04:09 +0000 Subject: limit "contains" traversals based on commit timestamp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When looking for commits that contain other commits (e.g., via "git tag --contains"), we can end up traversing useless portions of the graph. For example, if I am looking for a tag that contains a commit made last week, there is not much point in traversing portions of the history graph made five years ago. This optimization can provide massive speedups. For example, doing "git tag --contains HEAD~200" in the linux-2.6 repository goes from: real 0m5.302s user 0m5.116s sys 0m0.184s to: real 0m0.030s user 0m0.020s sys 0m0.008s The downside is that we will no longer find some answers in the face of extreme clock skew, as we will stop the traversal early when seeing commits skewed too far into the past. Name-rev already implements a similar optimization, using a "slop" of one day to allow for a certain amount of clock skew in commit timestamps. This patch introduces a "core.clockskew" variable, which allows specifying the allowable amount of clock skew in seconds. For safety, it defaults to "none", causing a full traversal (i.e., no change in behavior from previous versions). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/tag.c | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) (limited to 'builtin') diff --git a/builtin/tag.c b/builtin/tag.c index f7a7943c9d..72140b3986 100644 --- a/builtin/tag.c +++ b/builtin/tag.c @@ -25,6 +25,8 @@ static const char * const git_tag_usage[] = { static char signingkey[1000]; +static int core_clock_skew = -1; + struct tag_filter { const char *pattern; int lines; @@ -42,7 +44,8 @@ static int in_commit_list(const struct commit_list *want, struct commit *c) } static int contains_recurse(struct commit *candidate, - const struct commit_list *want) + const struct commit_list *want, + unsigned long cutoff) { struct commit_list *p; @@ -59,9 +62,13 @@ static int contains_recurse(struct commit *candidate, if (parse_commit(candidate) < 0) return 0; + /* stop searching if we go too far back in time */ + if (candidate->date < cutoff) + return 0; + /* Otherwise recurse and mark ourselves for future traversals. */ for (p = candidate->parents; p; p = p->next) { - if (contains_recurse(p->item, want)) { + if (contains_recurse(p->item, want, cutoff)) { candidate->object.flags |= TMP_MARK; return 1; } @@ -72,7 +79,22 @@ static int contains_recurse(struct commit *candidate, static int contains(struct commit *candidate, const struct commit_list *want) { - return contains_recurse(candidate, want); + unsigned long cutoff = 0; + + if (core_clock_skew >= 0) { + const struct commit_list *c; + unsigned long min_date = ULONG_MAX; + for (c = want; c; c = c->next) { + if (parse_commit(c->item) < 0) + continue; + if (c->item->date < min_date) + min_date = c->item->date; + } + if (min_date > core_clock_skew) + cutoff = min_date - core_clock_skew; + } + + return contains_recurse(candidate, want, cutoff); } static int show_reference(const char *refname, const unsigned char *sha1, @@ -279,6 +301,14 @@ static int git_tag_config(const char *var, const char *value, void *cb) return 0; } + if (!strcmp(var, "core.clockskew")) { + if (!value || !strcmp(value, "none")) + core_clock_skew = -1; + else + core_clock_skew = git_config_int(var, value); + return 0; + } + return git_default_config(var, value, cb); } -- cgit v1.2.3 From de9f14e26a179462b3d4ed9de64ba7dd9fdbfa02 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 11 Jun 2011 19:04:10 +0000 Subject: default core.clockskew variable to one day MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the slop value used by name-rev, so presumably is a reasonable default. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/tag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/tag.c b/builtin/tag.c index 72140b3986..e468696691 100644 --- a/builtin/tag.c +++ b/builtin/tag.c @@ -25,7 +25,7 @@ static const char * const git_tag_usage[] = { static char signingkey[1000]; -static int core_clock_skew = -1; +static int core_clock_skew = 86400; struct tag_filter { const char *pattern; -- cgit v1.2.3 From 188c35e36d22c39074a8b0ee411483a405065a9a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 11 Jun 2011 19:04:11 +0000 Subject: git skew: a tool to find how big a clock skew exists in the history MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit > As you probably guessed from the specificity of the number, I wrote a > short program to actually traverse and find the worst skew. It takes > about 5 seconds to run (unsurprisingly, since it is doing the same full > traversal that we end up doing in the above numbers). So we could > "autoskew" by setting up the configuration on clone, and then > periodically updating it as part of "git gc". This patch doesn't implement auto-detection of skew, but is the program I used to calculate, and would provide the basis for such auto-detection. It would be interesting to see average skew numbers for popular repositories. You can run it as "git skew --all". Signed-off-by: Junio C Hamano Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/skew.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 builtin/skew.c (limited to 'builtin') diff --git a/builtin/skew.c b/builtin/skew.c new file mode 100644 index 0000000000..1046f5f546 --- /dev/null +++ b/builtin/skew.c @@ -0,0 +1,50 @@ +#include "cache.h" +#include "commit.h" +#include "diff.h" +#include "revision.h" + +unsigned long worst_skew = 0; + +static void check_skew_recurse(struct commit *c, unsigned long when) +{ + struct commit_list *p; + + if (c->object.flags & SEEN) + return; + c->object.flags |= SEEN; + + if (parse_commit(c) < 0) + return; + + if (c->date > when) { + unsigned long skew = c->date - when; + if (skew > worst_skew) + worst_skew = skew; + } + + for (p = c->parents; p; p = p->next) + check_skew_recurse(p->item, c->date < when ? c->date : when); +} + +static void check_skew(struct commit *c) +{ + check_skew_recurse(c, time(NULL)); +} + +int cmd_skew(int argc, const char **argv, const char *prefix) { + struct rev_info revs; + int i; + + git_config(git_default_config, NULL); + init_revisions(&revs, prefix); + argc = setup_revisions(argc, argv, &revs, NULL); + + for (i = 0; i < revs.pending.nr; i++) { + struct object *o = revs.pending.objects[i].item; + if (o->type == OBJ_COMMIT) + check_skew((struct commit *)o); + } + + printf("%lu\n", worst_skew); + return 0; +} -- cgit v1.2.3 From c6d72c49720aa333e9c646f447147e8e546df174 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 14 Jul 2011 11:02:06 -0700 Subject: Revert clock-skew based attempt to optimize tag --contains traversal Signed-off-by: Junio C Hamano --- builtin/skew.c | 50 -------------------------------------------------- builtin/tag.c | 36 +++--------------------------------- 2 files changed, 3 insertions(+), 83 deletions(-) delete mode 100644 builtin/skew.c (limited to 'builtin') diff --git a/builtin/skew.c b/builtin/skew.c deleted file mode 100644 index 1046f5f546..0000000000 --- a/builtin/skew.c +++ /dev/null @@ -1,50 +0,0 @@ -#include "cache.h" -#include "commit.h" -#include "diff.h" -#include "revision.h" - -unsigned long worst_skew = 0; - -static void check_skew_recurse(struct commit *c, unsigned long when) -{ - struct commit_list *p; - - if (c->object.flags & SEEN) - return; - c->object.flags |= SEEN; - - if (parse_commit(c) < 0) - return; - - if (c->date > when) { - unsigned long skew = c->date - when; - if (skew > worst_skew) - worst_skew = skew; - } - - for (p = c->parents; p; p = p->next) - check_skew_recurse(p->item, c->date < when ? c->date : when); -} - -static void check_skew(struct commit *c) -{ - check_skew_recurse(c, time(NULL)); -} - -int cmd_skew(int argc, const char **argv, const char *prefix) { - struct rev_info revs; - int i; - - git_config(git_default_config, NULL); - init_revisions(&revs, prefix); - argc = setup_revisions(argc, argv, &revs, NULL); - - for (i = 0; i < revs.pending.nr; i++) { - struct object *o = revs.pending.objects[i].item; - if (o->type == OBJ_COMMIT) - check_skew((struct commit *)o); - } - - printf("%lu\n", worst_skew); - return 0; -} diff --git a/builtin/tag.c b/builtin/tag.c index e468696691..f7a7943c9d 100644 --- a/builtin/tag.c +++ b/builtin/tag.c @@ -25,8 +25,6 @@ static const char * const git_tag_usage[] = { static char signingkey[1000]; -static int core_clock_skew = 86400; - struct tag_filter { const char *pattern; int lines; @@ -44,8 +42,7 @@ static int in_commit_list(const struct commit_list *want, struct commit *c) } static int contains_recurse(struct commit *candidate, - const struct commit_list *want, - unsigned long cutoff) + const struct commit_list *want) { struct commit_list *p; @@ -62,13 +59,9 @@ static int contains_recurse(struct commit *candidate, if (parse_commit(candidate) < 0) return 0; - /* stop searching if we go too far back in time */ - if (candidate->date < cutoff) - return 0; - /* Otherwise recurse and mark ourselves for future traversals. */ for (p = candidate->parents; p; p = p->next) { - if (contains_recurse(p->item, want, cutoff)) { + if (contains_recurse(p->item, want)) { candidate->object.flags |= TMP_MARK; return 1; } @@ -79,22 +72,7 @@ static int contains_recurse(struct commit *candidate, static int contains(struct commit *candidate, const struct commit_list *want) { - unsigned long cutoff = 0; - - if (core_clock_skew >= 0) { - const struct commit_list *c; - unsigned long min_date = ULONG_MAX; - for (c = want; c; c = c->next) { - if (parse_commit(c->item) < 0) - continue; - if (c->item->date < min_date) - min_date = c->item->date; - } - if (min_date > core_clock_skew) - cutoff = min_date - core_clock_skew; - } - - return contains_recurse(candidate, want, cutoff); + return contains_recurse(candidate, want); } static int show_reference(const char *refname, const unsigned char *sha1, @@ -301,14 +279,6 @@ static int git_tag_config(const char *var, const char *value, void *cb) return 0; } - if (!strcmp(var, "core.clockskew")) { - if (!value || !strcmp(value, "none")) - core_clock_skew = -1; - else - core_clock_skew = git_config_int(var, value); - return 0; - } - return git_default_config(var, value, cb); } -- cgit v1.2.3