diff options
-rw-r--r-- | Documentation/config.txt | 23 | ||||
-rw-r--r-- | fsck.c | 60 | ||||
-rw-r--r-- | fsck.h | 8 | ||||
-rwxr-xr-x | t/perf/p1450-fsck.sh | 13 | ||||
-rwxr-xr-x | t/perf/p1451-fsck-skip-list.sh | 40 | ||||
-rwxr-xr-x | t/t5504-fetch-receive-strict.sh | 72 |
6 files changed, 171 insertions, 45 deletions
diff --git a/Documentation/config.txt b/Documentation/config.txt index fa52b19dff..1546833213 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1567,12 +1567,16 @@ doing the same for `receive.fsck.<msg-id>` and `fetch.fsck.<msg-id>` will only cause git to warn. fsck.skipList:: - The path to a sorted list of object names (i.e. one SHA-1 per + The path to a list of object names (i.e. one unabbreviated SHA-1 per line) that are known to be broken in a non-fatal way and should - be ignored. This feature is useful when an established project - should be accepted despite early commits containing errors that - can be safely ignored such as invalid committer email addresses. - Note: corrupt objects cannot be skipped with this setting. + be ignored. On versions of Git 2.20 and later comments ('#'), empty + lines, and any leading and trailing whitespace is ignored. Everything + but a SHA-1 per line will error out on older versions. ++ +This feature is useful when an established project should be accepted +despite early commits containing errors that can be safely ignored +such as invalid committer email addresses. Note: corrupt objects +cannot be skipped with this setting. + Like `fsck.<msg-id>` this variable has corresponding `receive.fsck.skipList` and `fetch.fsck.skipList` variants. @@ -1582,6 +1586,15 @@ Unlike variables like `color.ui` and `core.editor` the fall back on the `fsck.skipList` configuration if they aren't set. To uniformly configure the same fsck settings in different circumstances all three of them they must all set to the same values. ++ +Older versions of Git (before 2.20) documented that the object names +list should be sorted. This was never a requirement, the object names +could appear in any order, but when reading the list we tracked whether +the list was sorted for the purposes of an internal binary search +implementation, which could save itself some work with an already sorted +list. Unless you had a humongous list there was no reason to go out of +your way to pre-sort the list. After Git version 2.20 a hash implementation +is used instead, so there's now no reason to pre-sort the list. gc.aggressiveDepth:: The depth parameter used in the delta compression @@ -10,7 +10,6 @@ #include "fsck.h" #include "refs.h" #include "utf8.h" -#include "sha1-array.h" #include "decorate.h" #include "oidset.h" #include "packfile.h" @@ -184,40 +183,37 @@ static int fsck_msg_type(enum fsck_msg_id msg_id, static void init_skiplist(struct fsck_options *options, const char *path) { - static struct oid_array skiplist = OID_ARRAY_INIT; - int sorted, fd; - char buffer[GIT_MAX_HEXSZ + 1]; + FILE *fp; + struct strbuf sb = STRBUF_INIT; struct object_id oid; - if (options->skiplist) - sorted = options->skiplist->sorted; - else { - sorted = 1; - options->skiplist = &skiplist; - } - - fd = open(path, O_RDONLY); - if (fd < 0) + fp = fopen(path, "r"); + if (!fp) die("Could not open skip list: %s", path); - for (;;) { + while (!strbuf_getline(&sb, fp)) { const char *p; - int result = read_in_full(fd, buffer, sizeof(buffer)); - if (result < 0) - die_errno("Could not read '%s'", path); - if (!result) - break; - if (parse_oid_hex(buffer, &oid, &p) || *p != '\n') - die("Invalid SHA-1: %s", buffer); - oid_array_append(&skiplist, &oid); - if (sorted && skiplist.nr > 1 && - oidcmp(&skiplist.oid[skiplist.nr - 2], - &oid) > 0) - sorted = 0; - } - close(fd); + const char *hash; - if (sorted) - skiplist.sorted = 1; + /* + * Allow trailing comments, leading whitespace + * (including before commits), and empty or whitespace + * only lines. + */ + hash = strchr(sb.buf, '#'); + if (hash) + strbuf_setlen(&sb, hash - sb.buf); + strbuf_trim(&sb); + if (!sb.len) + continue; + + if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0') + die("Invalid SHA-1: %s", sb.buf); + oidset_insert(&options->skiplist, &oid); + } + if (ferror(fp)) + die_errno("Could not read '%s'", path); + fclose(fp); + strbuf_release(&sb); } static int parse_msg_type(const char *str) @@ -322,9 +318,7 @@ static void append_msg_id(struct strbuf *sb, const char *msg_id) static int object_on_skiplist(struct fsck_options *opts, struct object *obj) { - if (opts && opts->skiplist && obj) - return oid_array_lookup(opts->skiplist, &obj->oid) >= 0; - return 0; + return opts && obj && oidset_contains(&opts->skiplist, &obj->oid); } __attribute__((format (printf, 4, 5))) @@ -1,6 +1,8 @@ #ifndef GIT_FSCK_H #define GIT_FSCK_H +#include "oidset.h" + #define FSCK_ERROR 1 #define FSCK_WARN 2 #define FSCK_IGNORE 3 @@ -35,12 +37,12 @@ struct fsck_options { fsck_error error_func; unsigned strict:1; int *msg_type; - struct oid_array *skiplist; + struct oidset skiplist; struct decoration *object_names; }; -#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL } -#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL } +#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL, OIDSET_INIT } +#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL, OIDSET_INIT } /* descend in all linked child objects * the return value is: diff --git a/t/perf/p1450-fsck.sh b/t/perf/p1450-fsck.sh new file mode 100755 index 0000000000..ae1b84198b --- /dev/null +++ b/t/perf/p1450-fsck.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +test_description='Test fsck performance' + +. ./perf-lib.sh + +test_perf_large_repo + +test_perf 'fsck' ' + git fsck +' + +test_done diff --git a/t/perf/p1451-fsck-skip-list.sh b/t/perf/p1451-fsck-skip-list.sh new file mode 100755 index 0000000000..c2b97d2487 --- /dev/null +++ b/t/perf/p1451-fsck-skip-list.sh @@ -0,0 +1,40 @@ +#!/bin/sh + +test_description='Test fsck skipList performance' + +. ./perf-lib.sh + +test_perf_fresh_repo + +n=1000000 + +test_expect_success "setup $n bad commits" ' + for i in $(test_seq 1 $n) + do + echo "commit refs/heads/master" && + echo "committer C <c@example.com> 1234567890 +0000" && + echo "data <<EOF" && + echo "$i.Q." && + echo "EOF" + done | q_to_nul | git fast-import +' + +skip=0 +while test $skip -le $n +do + test_expect_success "create skipList for $skip bad commits" ' + git log --format=%H --max-count=$skip | + sort >skiplist + ' + + test_perf "fsck with $skip skipped bad commits" ' + git -c fsck.skipList=skiplist fsck + ' + + case $skip in + 0) skip=1 ;; + *) skip=${skip}0 ;; + esac +done + +test_done diff --git a/t/t5504-fetch-receive-strict.sh b/t/t5504-fetch-receive-strict.sh index 62f3569891..7bc706873c 100755 --- a/t/t5504-fetch-receive-strict.sh +++ b/t/t5504-fetch-receive-strict.sh @@ -133,6 +133,34 @@ committer Bugs Bunny <bugs@bun.ni> 1234567890 +0000 This commit object intentionally broken EOF +test_expect_success 'setup bogus commit' ' + commit="$(git hash-object -t commit -w --stdin <bogus-commit)" +' + +test_expect_success 'fsck with no skipList input' ' + test_must_fail git fsck 2>err && + test_i18ngrep "missingEmail" err +' + +test_expect_success 'setup sorted and unsorted skipLists' ' + cat >SKIP.unsorted <<-EOF && + 0000000000000000000000000000000000000004 + 0000000000000000000000000000000000000002 + $commit + 0000000000000000000000000000000000000001 + 0000000000000000000000000000000000000003 + EOF + sort SKIP.unsorted >SKIP.sorted +' + +test_expect_success 'fsck with sorted skipList' ' + git -c fsck.skipList=SKIP.sorted fsck +' + +test_expect_success 'fsck with unsorted skipList' ' + git -c fsck.skipList=SKIP.unsorted fsck +' + test_expect_success 'fsck with invalid or bogus skipList input' ' git -c fsck.skipList=/dev/null -c fsck.missingEmail=ignore fsck && test_must_fail git -c fsck.skipList=does-not-exist -c fsck.missingEmail=ignore fsck 2>err && @@ -141,8 +169,47 @@ test_expect_success 'fsck with invalid or bogus skipList input' ' test_i18ngrep "Invalid SHA-1: \[core\]" err ' +test_expect_success 'fsck with other accepted skipList input (comments & empty lines)' ' + cat >SKIP.with-comment <<-EOF && + # Some bad commit + 0000000000000000000000000000000000000001 + EOF + test_must_fail git -c fsck.skipList=SKIP.with-comment fsck 2>err-with-comment && + test_i18ngrep "missingEmail" err-with-comment && + cat >SKIP.with-empty-line <<-EOF && + 0000000000000000000000000000000000000001 + + 0000000000000000000000000000000000000002 + EOF + test_must_fail git -c fsck.skipList=SKIP.with-empty-line fsck 2>err-with-empty-line && + test_i18ngrep "missingEmail" err-with-empty-line +' + +test_expect_success 'fsck no garbage output from comments & empty lines errors' ' + test_line_count = 1 err-with-comment && + test_line_count = 1 err-with-empty-line +' + +test_expect_success 'fsck with invalid abbreviated skipList input' ' + echo $commit | test_copy_bytes 20 >SKIP.abbreviated && + test_must_fail git -c fsck.skipList=SKIP.abbreviated fsck 2>err-abbreviated && + test_i18ngrep "^fatal: Invalid SHA-1: " err-abbreviated +' + +test_expect_success 'fsck with exhaustive accepted skipList input (various types of comments etc.)' ' + >SKIP.exhaustive && + echo "# A commented line" >>SKIP.exhaustive && + echo "" >>SKIP.exhaustive && + echo " " >>SKIP.exhaustive && + echo " # Comment after whitespace" >>SKIP.exhaustive && + echo "$commit # Our bad commit (with leading whitespace and trailing comment)" >>SKIP.exhaustive && + echo "# Some bad commit (leading whitespace)" >>SKIP.exhaustive && + echo " 0000000000000000000000000000000000000001" >>SKIP.exhaustive && + git -c fsck.skipList=SKIP.exhaustive fsck 2>err && + test_must_be_empty err +' + test_expect_success 'push with receive.fsck.skipList' ' - commit="$(git hash-object -t commit -w --stdin <bogus-commit)" && git push . $commit:refs/heads/bogus && rm -rf dst && git init dst && @@ -169,7 +236,6 @@ test_expect_success 'push with receive.fsck.skipList' ' ' test_expect_success 'fetch with fetch.fsck.skipList' ' - commit="$(git hash-object -t commit -w --stdin <bogus-commit)" && refspec=refs/heads/bogus:refs/heads/bogus && git push . $commit:refs/heads/bogus && rm -rf dst && @@ -204,7 +270,6 @@ test_expect_success 'fsck.<unknownmsg-id> dies' ' ' test_expect_success 'push with receive.fsck.missingEmail=warn' ' - commit="$(git hash-object -t commit -w --stdin <bogus-commit)" && git push . $commit:refs/heads/bogus && rm -rf dst && git init dst && @@ -232,7 +297,6 @@ test_expect_success 'push with receive.fsck.missingEmail=warn' ' ' test_expect_success 'fetch with fetch.fsck.missingEmail=warn' ' - commit="$(git hash-object -t commit -w --stdin <bogus-commit)" && refspec=refs/heads/bogus:refs/heads/bogus && git push . $commit:refs/heads/bogus && rm -rf dst && |