diff options
author | Junio C Hamano <gitster@pobox.com> | 2021-05-16 21:05:23 +0900 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2021-05-16 21:05:23 +0900 |
commit | a737e1f1d25747481bd4925555006f569e461117 (patch) | |
tree | f7f7cb6a78b9e67f9eb6e9c35cdc85758c0ebd25 /t | |
parent | Merge branch 'jt/push-negotiation' (diff) | |
parent | ci: run test round with parallel-checkout enabled (diff) | |
download | tgif-a737e1f1d25747481bd4925555006f569e461117.tar.xz |
Merge branch 'mt/parallel-checkout-part-3'
The final part of "parallel checkout".
* mt/parallel-checkout-part-3:
ci: run test round with parallel-checkout enabled
parallel-checkout: add tests related to .gitattributes
t0028: extract encoding helpers to lib-encoding.sh
parallel-checkout: add tests related to path collisions
parallel-checkout: add tests for basic operations
checkout-index: add parallel checkout support
builtin/checkout.c: complete parallel checkout support
make_transient_cache_entry(): optionally alloc from mem_pool
Diffstat (limited to 't')
-rw-r--r-- | t/README | 4 | ||||
-rw-r--r-- | t/lib-encoding.sh | 25 | ||||
-rw-r--r-- | t/lib-parallel-checkout.sh | 45 | ||||
-rwxr-xr-x | t/t0028-working-tree-encoding.sh | 25 | ||||
-rwxr-xr-x | t/t2080-parallel-checkout-basics.sh | 229 | ||||
-rwxr-xr-x | t/t2081-parallel-checkout-collisions.sh | 162 | ||||
-rwxr-xr-x | t/t2082-parallel-checkout-attributes.sh | 194 |
7 files changed, 660 insertions, 24 deletions
@@ -439,6 +439,10 @@ GIT_TEST_WRITE_REV_INDEX=<boolean>, when true enables the GIT_TEST_SPARSE_INDEX=<boolean>, when true enables index writes to use the sparse-index format by default. +GIT_TEST_CHECKOUT_WORKERS=<n> overrides the 'checkout.workers' setting +to <n> and 'checkout.thresholdForParallelism' to 0, forcing the +execution of the parallel-checkout code. + Naming Tests ------------ diff --git a/t/lib-encoding.sh b/t/lib-encoding.sh new file mode 100644 index 0000000000..2dabc8c73e --- /dev/null +++ b/t/lib-encoding.sh @@ -0,0 +1,25 @@ +# Encoding helpers + +test_lazy_prereq NO_UTF16_BOM ' + test $(printf abc | iconv -f UTF-8 -t UTF-16 | wc -c) = 6 +' + +test_lazy_prereq NO_UTF32_BOM ' + test $(printf abc | iconv -f UTF-8 -t UTF-32 | wc -c) = 12 +' + +write_utf16 () { + if test_have_prereq NO_UTF16_BOM + then + printf '\376\377' + fi && + iconv -f UTF-8 -t UTF-16 +} + +write_utf32 () { + if test_have_prereq NO_UTF32_BOM + then + printf '\0\0\376\377' + fi && + iconv -f UTF-8 -t UTF-32 +} diff --git a/t/lib-parallel-checkout.sh b/t/lib-parallel-checkout.sh new file mode 100644 index 0000000000..21f5759732 --- /dev/null +++ b/t/lib-parallel-checkout.sh @@ -0,0 +1,45 @@ +# Helpers for tests invoking parallel-checkout + +# Parallel checkout tests need full control of the number of workers +unset GIT_TEST_CHECKOUT_WORKERS + +set_checkout_config () { + if test $# -ne 2 + then + BUG "usage: set_checkout_config <workers> <threshold>" + fi && + + test_config_global checkout.workers $1 && + test_config_global checkout.thresholdForParallelism $2 +} + +# Run "${@:2}" and check that $1 checkout workers were used +test_checkout_workers () { + if test $# -lt 2 + then + BUG "too few arguments to test_checkout_workers" + fi && + + local expected_workers=$1 && + shift && + + local trace_file=trace-test-checkout-workers && + rm -f "$trace_file" && + GIT_TRACE2="$(pwd)/$trace_file" "$@" 2>&8 && + + local workers=$(grep "child_start\[..*\] git checkout--worker" "$trace_file" | wc -l) && + test $workers -eq $expected_workers && + rm "$trace_file" +} 8>&2 2>&4 + +# Verify that both the working tree and the index were created correctly +verify_checkout () { + if test $# -ne 1 + then + BUG "usage: verify_checkout <repository path>" + fi && + + git -C "$1" diff-index --ignore-submodules=none --exit-code HEAD -- && + git -C "$1" status --porcelain >"$1".status && + test_must_be_empty "$1".status +} diff --git a/t/t0028-working-tree-encoding.sh b/t/t0028-working-tree-encoding.sh index f970a9806b..82905a2156 100755 --- a/t/t0028-working-tree-encoding.sh +++ b/t/t0028-working-tree-encoding.sh @@ -6,33 +6,10 @@ GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . ./test-lib.sh +. "$TEST_DIRECTORY/lib-encoding.sh" GIT_TRACE_WORKING_TREE_ENCODING=1 && export GIT_TRACE_WORKING_TREE_ENCODING -test_lazy_prereq NO_UTF16_BOM ' - test $(printf abc | iconv -f UTF-8 -t UTF-16 | wc -c) = 6 -' - -test_lazy_prereq NO_UTF32_BOM ' - test $(printf abc | iconv -f UTF-8 -t UTF-32 | wc -c) = 12 -' - -write_utf16 () { - if test_have_prereq NO_UTF16_BOM - then - printf '\376\377' - fi && - iconv -f UTF-8 -t UTF-16 -} - -write_utf32 () { - if test_have_prereq NO_UTF32_BOM - then - printf '\0\0\376\377' - fi && - iconv -f UTF-8 -t UTF-32 -} - test_expect_success 'setup test files' ' git config core.eol lf && diff --git a/t/t2080-parallel-checkout-basics.sh b/t/t2080-parallel-checkout-basics.sh new file mode 100755 index 0000000000..7087818550 --- /dev/null +++ b/t/t2080-parallel-checkout-basics.sh @@ -0,0 +1,229 @@ +#!/bin/sh + +test_description='parallel-checkout basics + +Ensure that parallel-checkout basically works on clone and checkout, spawning +the required number of workers and correctly populating both the index and the +working tree. +' + +TEST_NO_CREATE_REPO=1 +. ./test-lib.sh +. "$TEST_DIRECTORY/lib-parallel-checkout.sh" + +# Test parallel-checkout with a branch switch containing a variety of file +# creations, deletions, and modifications, involving different entry types. +# The branches B1 and B2 have the following paths: +# +# B1 B2 +# a/a (file) a (file) +# b (file) b/b (file) +# +# c/c (file) c (symlink) +# d (symlink) d/d (file) +# +# e/e (file) e (submodule) +# f (submodule) f/f (file) +# +# g (submodule) g (symlink) +# h (symlink) h (submodule) +# +# Additionally, the following paths are present on both branches, but with +# different contents: +# +# i (file) i (file) +# j (symlink) j (symlink) +# k (submodule) k (submodule) +# +# And the following paths are only present in one of the branches: +# +# l/l (file) - +# - m/m (file) +# +test_expect_success 'setup repo for checkout with various types of changes' ' + git init sub && + ( + cd sub && + git checkout -b B2 && + echo B2 >file && + git add file && + git commit -m file && + + git checkout -b B1 && + echo B1 >file && + git add file && + git commit -m file + ) && + + git init various && + ( + cd various && + + git checkout -b B1 && + mkdir a c e && + echo a/a >a/a && + echo b >b && + echo c/c >c/c && + test_ln_s_add c d && + echo e/e >e/e && + git submodule add ../sub f && + git submodule add ../sub g && + test_ln_s_add c h && + + echo "B1 i" >i && + test_ln_s_add c j && + git submodule add -b B1 ../sub k && + mkdir l && + echo l/l >l/l && + + git add . && + git commit -m B1 && + + git checkout -b B2 && + git rm -rf :^.gitmodules :^k && + mkdir b d f && + echo a >a && + echo b/b >b/b && + test_ln_s_add b c && + echo d/d >d/d && + git submodule add ../sub e && + echo f/f >f/f && + test_ln_s_add b g && + git submodule add ../sub h && + + echo "B2 i" >i && + test_ln_s_add b j && + git -C k checkout B2 && + mkdir m && + echo m/m >m/m && + + git add . && + git commit -m B2 && + + git checkout --recurse-submodules B1 + ) +' + +for mode in sequential parallel sequential-fallback +do + case $mode in + sequential) workers=1 threshold=0 expected_workers=0 ;; + parallel) workers=2 threshold=0 expected_workers=2 ;; + sequential-fallback) workers=2 threshold=100 expected_workers=0 ;; + esac + + test_expect_success "$mode checkout" ' + repo=various_$mode && + cp -R various $repo && + + # The just copied files have more recent timestamps than their + # associated index entries. So refresh the cached timestamps + # to avoid an "entry not up-to-date" error from `git checkout`. + # We only have to do this for the submodules as `git checkout` + # will already refresh the superproject index before performing + # the up-to-date check. + # + git -C $repo submodule foreach "git update-index --refresh" && + + set_checkout_config $workers $threshold && + test_checkout_workers $expected_workers \ + git -C $repo checkout --recurse-submodules B2 && + verify_checkout $repo + ' +done + +for mode in parallel sequential-fallback +do + case $mode in + parallel) workers=2 threshold=0 expected_workers=2 ;; + sequential-fallback) workers=2 threshold=100 expected_workers=0 ;; + esac + + test_expect_success "$mode checkout on clone" ' + repo=various_${mode}_clone && + set_checkout_config $workers $threshold && + test_checkout_workers $expected_workers \ + git clone --recurse-submodules --branch B2 various $repo && + verify_checkout $repo + ' +done + +# Just to be paranoid, actually compare the working trees' contents directly. +test_expect_success 'compare the working trees' ' + rm -rf various_*/.git && + rm -rf various_*/*/.git && + + # We use `git diff` instead of `diff -r` because the latter would + # follow symlinks, and not all `diff` implementations support the + # `--no-dereference` option. + # + git diff --no-index various_sequential various_parallel && + git diff --no-index various_sequential various_parallel_clone && + git diff --no-index various_sequential various_sequential-fallback && + git diff --no-index various_sequential various_sequential-fallback_clone +' + +# Currently, each submodule is checked out in a separated child process, but +# these subprocesses must also be able to use parallel checkout workers to +# write the submodules' entries. +test_expect_success 'submodules can use parallel checkout' ' + set_checkout_config 2 0 && + git init super && + ( + cd super && + git init sub && + test_commit -C sub A && + test_commit -C sub B && + git submodule add ./sub && + git commit -m sub && + rm sub/* && + test_checkout_workers 2 git checkout --recurse-submodules . + ) +' + +test_expect_success 'parallel checkout respects --[no]-force' ' + set_checkout_config 2 0 && + git init dirty && + ( + cd dirty && + mkdir D && + test_commit D/F && + test_commit F && + + rm -rf D && + echo changed >D && + echo changed >F.t && + + # We expect 0 workers because there is nothing to be done + test_checkout_workers 0 git checkout HEAD && + test_path_is_file D && + grep changed D && + grep changed F.t && + + test_checkout_workers 2 git checkout --force HEAD && + test_path_is_dir D && + grep D/F D/F.t && + grep F F.t + ) +' + +test_expect_success SYMLINKS 'parallel checkout checks for symlinks in leading dirs' ' + set_checkout_config 2 0 && + git init symlinks && + ( + cd symlinks && + mkdir D untracked && + # Commit 2 files to have enough work for 2 parallel workers + test_commit D/A && + test_commit D/B && + rm -rf D && + ln -s untracked D && + + test_checkout_workers 2 git checkout --force HEAD && + ! test -h D && + grep D/A D/A.t && + grep D/B D/B.t + ) +' + +test_done diff --git a/t/t2081-parallel-checkout-collisions.sh b/t/t2081-parallel-checkout-collisions.sh new file mode 100755 index 0000000000..f6fcfc0c1e --- /dev/null +++ b/t/t2081-parallel-checkout-collisions.sh @@ -0,0 +1,162 @@ +#!/bin/sh + +test_description="path collisions during parallel checkout + +Parallel checkout must detect path collisions to: + +1) Avoid racily writing to different paths that represent the same file on disk. +2) Report the colliding entries on clone. + +The tests in this file exercise parallel checkout's collision detection code in +both these mechanics. +" + +. ./test-lib.sh +. "$TEST_DIRECTORY/lib-parallel-checkout.sh" + +TEST_ROOT="$PWD" + +test_expect_success CASE_INSENSITIVE_FS 'setup' ' + empty_oid=$(git hash-object -w --stdin </dev/null) && + cat >objs <<-EOF && + 100644 $empty_oid FILE_X + 100644 $empty_oid FILE_x + 100644 $empty_oid file_X + 100644 $empty_oid file_x + EOF + git update-index --index-info <objs && + git commit -m "colliding files" && + git tag basename_collision && + + write_script "$TEST_ROOT"/logger_script <<-\EOF + echo "$@" >>filter.log + EOF +' + +test_workers_in_event_trace () +{ + test $1 -eq $(grep ".event.:.child_start..*checkout--worker" $2 | wc -l) +} + +test_expect_success CASE_INSENSITIVE_FS 'worker detects basename collision' ' + GIT_TRACE2_EVENT="$(pwd)/trace" git \ + -c checkout.workers=2 -c checkout.thresholdForParallelism=0 \ + checkout . && + + test_workers_in_event_trace 2 trace && + collisions=$(grep -i "category.:.pcheckout.,.key.:.collision/basename.,.value.:.file_x.}" trace | wc -l) && + test $collisions -eq 3 +' + +test_expect_success CASE_INSENSITIVE_FS 'worker detects dirname collision' ' + test_config filter.logger.smudge "\"$TEST_ROOT/logger_script\" %f" && + empty_oid=$(git hash-object -w --stdin </dev/null) && + + # By setting a filter command to "a", we make it ineligible for parallel + # checkout, and thus it is checked out *first*. This way we can ensure + # that "A/B" and "A/C" will both collide with the regular file "a". + # + attr_oid=$(echo "a filter=logger" | git hash-object -w --stdin) && + + cat >objs <<-EOF && + 100644 $empty_oid A/B + 100644 $empty_oid A/C + 100644 $empty_oid a + 100644 $attr_oid .gitattributes + EOF + git rm -rf . && + git update-index --index-info <objs && + + rm -f trace filter.log && + GIT_TRACE2_EVENT="$(pwd)/trace" git \ + -c checkout.workers=2 -c checkout.thresholdForParallelism=0 \ + checkout . && + + # Check that "a" (and only "a") was filtered + echo a >expected.log && + test_cmp filter.log expected.log && + + # Check that it used the right number of workers and detected the collisions + test_workers_in_event_trace 2 trace && + grep "category.:.pcheckout.,.key.:.collision/dirname.,.value.:.A/B.}" trace && + grep "category.:.pcheckout.,.key.:.collision/dirname.,.value.:.A/C.}" trace +' + +test_expect_success SYMLINKS,CASE_INSENSITIVE_FS 'do not follow symlinks colliding with leading dir' ' + empty_oid=$(git hash-object -w --stdin </dev/null) && + symlink_oid=$(echo "./e" | git hash-object -w --stdin) && + mkdir e && + + cat >objs <<-EOF && + 120000 $symlink_oid D + 100644 $empty_oid d/x + 100644 $empty_oid e/y + EOF + git rm -rf . && + git update-index --index-info <objs && + + set_checkout_config 2 0 && + test_checkout_workers 2 git checkout . && + test_path_is_dir e && + test_path_is_missing e/x +' + +# The two following tests check that parallel checkout correctly reports +# colliding entries on clone. The sequential code detects a collision by +# calling lstat() before trying to open(O_CREAT) a file. (Note that this only +# works for clone.) Then, to find the pair of a colliding item k, it searches +# cache_entry[0, k-1]. This is not sufficient in parallel checkout because: +# +# - A colliding file may be created between the lstat() and open() calls; +# - A colliding entry might appear in the second half of the cache_entry array. +# +test_expect_success CASE_INSENSITIVE_FS 'collision report on clone (w/ racy file creation)' ' + git reset --hard basename_collision && + set_checkout_config 2 0 && + test_checkout_workers 2 git clone . clone-repo 2>stderr && + + grep FILE_X stderr && + grep FILE_x stderr && + grep file_X stderr && + grep file_x stderr && + grep "the following paths have collided" stderr +' + +# This test ensures that the collision report code is correctly looking for +# colliding peers in the second half of the cache_entry array. This is done by +# defining a smudge command for the *last* array entry, which makes it +# non-eligible for parallel-checkout. Thus, it is checked out *first*, before +# spawning the workers. +# +# Note: this test doesn't work on Windows because, on this system, the +# collision report code uses strcmp() to find the colliding pairs when +# core.ignoreCase is false. And we need this setting for this test so that only +# 'file_x' matches the pattern of the filter attribute. But the test works on +# OSX, where the colliding pairs are found using inode. +# +test_expect_success CASE_INSENSITIVE_FS,!MINGW,!CYGWIN \ + 'collision report on clone (w/ colliding peer after the detected entry)' ' + + test_config_global filter.logger.smudge "\"$TEST_ROOT/logger_script\" %f" && + git reset --hard basename_collision && + echo "file_x filter=logger" >.gitattributes && + git add .gitattributes && + git commit -m "filter for file_x" && + + rm -rf clone-repo && + set_checkout_config 2 0 && + test_checkout_workers 2 \ + git -c core.ignoreCase=false clone . clone-repo 2>stderr && + + grep FILE_X stderr && + grep FILE_x stderr && + grep file_X stderr && + grep file_x stderr && + grep "the following paths have collided" stderr && + + # Check that only "file_x" was filtered + echo file_x >expected.log && + test_cmp clone-repo/filter.log expected.log +' + +test_done diff --git a/t/t2082-parallel-checkout-attributes.sh b/t/t2082-parallel-checkout-attributes.sh new file mode 100755 index 0000000000..2525457961 --- /dev/null +++ b/t/t2082-parallel-checkout-attributes.sh @@ -0,0 +1,194 @@ +#!/bin/sh + +test_description='parallel-checkout: attributes + +Verify that parallel-checkout correctly creates files that require +conversions, as specified in .gitattributes. The main point here is +to check that the conv_attr data is correctly sent to the workers +and that it contains sufficient information to smudge files +properly (without access to the index or attribute stack). +' + +TEST_NO_CREATE_REPO=1 +. ./test-lib.sh +. "$TEST_DIRECTORY/lib-parallel-checkout.sh" +. "$TEST_DIRECTORY/lib-encoding.sh" + +test_expect_success 'parallel-checkout with ident' ' + set_checkout_config 2 0 && + git init ident && + ( + cd ident && + echo "A ident" >.gitattributes && + echo "\$Id\$" >A && + echo "\$Id\$" >B && + git add -A && + git commit -m id && + + rm A B && + test_checkout_workers 2 git reset --hard && + hexsz=$(test_oid hexsz) && + grep -E "\\\$Id: [0-9a-f]{$hexsz} \\\$" A && + grep "\\\$Id\\\$" B + ) +' + +test_expect_success 'parallel-checkout with re-encoding' ' + set_checkout_config 2 0 && + git init encoding && + ( + cd encoding && + echo text >utf8-text && + write_utf16 <utf8-text >utf16-text && + + echo "A working-tree-encoding=UTF-16" >.gitattributes && + cp utf16-text A && + cp utf8-text B && + git add A B .gitattributes && + git commit -m encoding && + + # Check that A is stored in UTF-8 + git cat-file -p :A >A.internal && + test_cmp_bin utf8-text A.internal && + + rm A B && + test_checkout_workers 2 git checkout A B && + + # Check that A (and only A) is re-encoded during checkout + test_cmp_bin utf16-text A && + test_cmp_bin utf8-text B + ) +' + +test_expect_success 'parallel-checkout with eol conversions' ' + set_checkout_config 2 0 && + git init eol && + ( + cd eol && + printf "multi\r\nline\r\ntext" >crlf-text && + printf "multi\nline\ntext" >lf-text && + + git config core.autocrlf false && + echo "A eol=crlf" >.gitattributes && + cp crlf-text A && + cp lf-text B && + git add A B .gitattributes && + git commit -m eol && + + # Check that A is stored with LF format + git cat-file -p :A >A.internal && + test_cmp_bin lf-text A.internal && + + rm A B && + test_checkout_workers 2 git checkout A B && + + # Check that A (and only A) is converted to CRLF during checkout + test_cmp_bin crlf-text A && + test_cmp_bin lf-text B + ) +' + +# Entries that require an external filter are not eligible for parallel +# checkout. Check that both the parallel-eligible and non-eligible entries are +# properly writen in a single checkout operation. +# +test_expect_success 'parallel-checkout and external filter' ' + set_checkout_config 2 0 && + git init filter && + ( + cd filter && + write_script <<-\EOF rot13.sh && + tr \ + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" \ + "nopqrstuvwxyzabcdefghijklmNOPQRSTUVWXYZABCDEFGHIJKLM" + EOF + + git config filter.rot13.clean "\"$(pwd)/rot13.sh\"" && + git config filter.rot13.smudge "\"$(pwd)/rot13.sh\"" && + git config filter.rot13.required true && + + echo abcd >original && + echo nopq >rot13 && + + echo "A filter=rot13" >.gitattributes && + cp original A && + cp original B && + cp original C && + git add A B C .gitattributes && + git commit -m filter && + + # Check that A (and only A) was cleaned + git cat-file -p :A >A.internal && + test_cmp rot13 A.internal && + git cat-file -p :B >B.internal && + test_cmp original B.internal && + git cat-file -p :C >C.internal && + test_cmp original C.internal && + + rm A B C *.internal && + test_checkout_workers 2 git checkout A B C && + + # Check that A (and only A) was smudged during checkout + test_cmp original A && + test_cmp original B && + test_cmp original C + ) +' + +# The delayed queue is independent from the parallel queue, and they should be +# able to work together in the same checkout process. +# +test_expect_success PERL 'parallel-checkout and delayed checkout' ' + write_script rot13-filter.pl "$PERL_PATH" \ + <"$TEST_DIRECTORY"/t0021/rot13-filter.pl && + + test_config_global filter.delay.process \ + "\"$(pwd)/rot13-filter.pl\" --always-delay \"$(pwd)/delayed.log\" clean smudge delay" && + test_config_global filter.delay.required true && + + echo "abcd" >original && + echo "nopq" >rot13 && + + git init delayed && + ( + cd delayed && + echo "*.d filter=delay" >.gitattributes && + cp ../original W.d && + cp ../original X.d && + cp ../original Y && + cp ../original Z && + git add -A && + git commit -m delayed && + + # Check that *.d files were cleaned + git cat-file -p :W.d >W.d.internal && + test_cmp W.d.internal ../rot13 && + git cat-file -p :X.d >X.d.internal && + test_cmp X.d.internal ../rot13 && + git cat-file -p :Y >Y.internal && + test_cmp Y.internal ../original && + git cat-file -p :Z >Z.internal && + test_cmp Z.internal ../original && + + rm * + ) && + + set_checkout_config 2 0 && + test_checkout_workers 2 git -C delayed checkout -f && + verify_checkout delayed && + + # Check that the *.d files got to the delay queue and were filtered + grep "smudge W.d .* \[DELAYED\]" delayed.log && + grep "smudge X.d .* \[DELAYED\]" delayed.log && + test_cmp delayed/W.d original && + test_cmp delayed/X.d original && + + # Check that the parallel-eligible entries went to the right queue and + # were not filtered + ! grep "smudge Y .* \[DELAYED\]" delayed.log && + ! grep "smudge Z .* \[DELAYED\]" delayed.log && + test_cmp delayed/Y original && + test_cmp delayed/Z original +' + +test_done |