From b6e8a3b5404606f156a14bac262d7e9adf620990 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 17 Apr 2015 18:11:04 -0400 Subject: limit_list: avoid quadratic behavior from still_interesting When we are limiting a rev-list traversal due to UNINTERESTING refs, we have to walk down the tips (both interesting and uninteresting) to find where they intersect. We keep a queue of commits to examine, pop commits off the queue one by one, and potentially add their parents. The size of the queue will naturally fluctuate based on the "width" of the history graph; i.e., the number of simultaneous lines of development. But for the most part it will stay in the same ballpark as the initial number of tips we fed, shrinking over time (as we hit common ancestors of the tips). So roughly speaking, if we start with `N` tips, we'll spend much of the time with a queue around `N` items. For each UNINTERESTING commit we pop, we call still_interesting to check whether marking its parents as UNINTERESTING has made the whole queue uninteresting (in which case we can quit early). Because the queue is stored as a linked list, this is `O(N)`, where `N` is the number of items in the queue. So processing a queue with `N` commits marked UNINTERESTING (and one or more interesting commits) will take `O(N^2)`. If you feed a lot of positive tips, this isn't a problem. They aren't UNINTERESTING, so they don't incur the still_interesting check. It also isn't a problem if you traverse from an interesting tip to some UNINTERESTING bases. We order the queue by recency, so the interesting commits stay at the front of the queue as we walk down them. The linear check can exit early as soon as it sees one interesting commit left in the queue. But if you want to know whether an older commit is reachable from a set of newer tips, we end up processing in the opposite direction: from the UNINTERESTING ones down to the interesting one. This may happen when we call: git rev-list $commits --not --all in check_everything_connected after a fetch. If we fetched something much older than most of our refs, and if we have a large number of refs, the traversal cost is dominated by the quadratic behavior. These commands simulate the connectivity check of such a fetch, when you have `$n` distinct refs in the receiver: # positive ref is 100,000 commits deep git rev-list --all | head -100000 | tail -1 >input # huge number of more recent negative refs git rev-list --all | head -$n | sed s/^/^/ >>input time git rev-list --stdin Signed-off-by: Junio C Hamano --- revision.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/revision.c b/revision.c index 2571ada6bf..06f31d6fb5 100644 --- a/revision.c +++ b/revision.c @@ -334,14 +334,24 @@ static struct commit *handle_commit(struct rev_info *revs, die("%s is unknown object", name); } -static int everybody_uninteresting(struct commit_list *orig) +static int everybody_uninteresting(struct commit_list *orig, + struct commit **interesting_cache) { struct commit_list *list = orig; + + if (*interesting_cache) { + struct commit *commit = *interesting_cache; + if (!(commit->object.flags & UNINTERESTING)) + return 0; + } + while (list) { struct commit *commit = list->item; list = list->next; if (commit->object.flags & UNINTERESTING) continue; + if (interesting_cache) + *interesting_cache = commit; return 0; } return 1; @@ -929,7 +939,8 @@ static void cherry_pick_list(struct commit_list *list, struct rev_info *revs) /* How many extra uninteresting commits we want to see.. */ #define SLOP 5 -static int still_interesting(struct commit_list *src, unsigned long date, int slop) +static int still_interesting(struct commit_list *src, unsigned long date, int slop, + struct commit **interesting_cache) { /* * No source list at all? We're definitely done.. @@ -948,7 +959,7 @@ static int still_interesting(struct commit_list *src, unsigned long date, int sl * Does the source list still have interesting commits in * it? Definitely not done.. */ - if (!everybody_uninteresting(src)) + if (!everybody_uninteresting(src, interesting_cache)) return SLOP; /* Ok, we're closing in.. */ @@ -1067,6 +1078,7 @@ static int limit_list(struct rev_info *revs) struct commit_list *newlist = NULL; struct commit_list **p = &newlist; struct commit_list *bottom = NULL; + struct commit *interesting_cache = NULL; if (revs->ancestry_path) { bottom = collect_bottom_commits(list); @@ -1083,6 +1095,9 @@ static int limit_list(struct rev_info *revs) list = list->next; free(entry); + if (commit == interesting_cache) + interesting_cache = NULL; + if (revs->max_age != -1 && (commit->date < revs->max_age)) obj->flags |= UNINTERESTING; if (add_parents_to_list(revs, commit, &list, NULL) < 0) @@ -1091,7 +1106,7 @@ static int limit_list(struct rev_info *revs) mark_parents_uninteresting(commit); if (revs->show_all) p = &commit_list_insert(commit, p)->next; - slop = still_interesting(list, date, slop); + slop = still_interesting(list, date, slop, &interesting_cache); if (slop) continue; /* If showing all, add the whole pending list to the end */ -- cgit v1.2.3