diff options
author | Junio C Hamano <gitster@pobox.com> | 2018-10-26 14:22:10 +0900 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2018-10-26 14:22:10 +0900 |
commit | 7a43ab6fb25924ca018073ef7ef46e16b3156bd0 (patch) | |
tree | 08b311fbea07553319c3cd651908dce9aee98505 | |
parent | Merge branch 'ds/coverage-diff' (diff) | |
parent | split-index: BUG() when cache entry refers to non-existing shared entry (diff) | |
download | tgif-7a43ab6fb25924ca018073ef7ef46e16b3156bd0.tar.xz |
Merge branch 'sg/split-index-racefix'
The codepath to support the experimental split-index mode had
remaining "racily clean" issues fixed.
* sg/split-index-racefix:
split-index: BUG() when cache entry refers to non-existing shared entry
split-index: smudge and add racily clean cache entries to split index
split-index: don't compare cached data of entries already marked for split index
split-index: count the number of deleted entries
t1700-split-index: date back files to avoid racy situations
split-index: add tests to demonstrate the racy split index problem
t1700-split-index: document why FSMONITOR is disabled in this test script
-rw-r--r-- | cache.h | 2 | ||||
-rw-r--r-- | read-cache.c | 2 | ||||
-rw-r--r-- | split-index.c | 135 | ||||
-rwxr-xr-x | t/t1700-split-index.sh | 49 | ||||
-rwxr-xr-x | t/t1701-racy-split-index.sh | 214 |
5 files changed, 360 insertions, 42 deletions
@@ -783,6 +783,8 @@ extern void *read_blob_data_from_index(const struct index_state *, const char *, #define CE_MATCH_REFRESH 0x10 /* don't refresh_fsmonitor state or do stat comparison even if CE_FSMONITOR_VALID is true */ #define CE_MATCH_IGNORE_FSMONITOR 0X20 +extern int is_racy_timestamp(const struct index_state *istate, + const struct cache_entry *ce); extern int ie_match_stat(struct index_state *, const struct cache_entry *, struct stat *, unsigned int); extern int ie_modified(struct index_state *, const struct cache_entry *, struct stat *, unsigned int); diff --git a/read-cache.c b/read-cache.c index 1df5c16dbc..d57958233e 100644 --- a/read-cache.c +++ b/read-cache.c @@ -345,7 +345,7 @@ static int is_racy_stat(const struct index_state *istate, ); } -static int is_racy_timestamp(const struct index_state *istate, +int is_racy_timestamp(const struct index_state *istate, const struct cache_entry *ce) { return (!S_ISGITLINK(ce->ce_mode) && diff --git a/split-index.c b/split-index.c index 84f067e10d..5820412dc5 100644 --- a/split-index.c +++ b/split-index.c @@ -111,7 +111,7 @@ static void mark_entry_for_delete(size_t pos, void *data) die("position for delete %d exceeds base index size %d", (int)pos, istate->cache_nr); istate->cache[pos]->ce_flags |= CE_REMOVE; - istate->split_index->nr_deletions = 1; + istate->split_index->nr_deletions++; } static void replace_entry(size_t pos, void *data) @@ -188,6 +188,30 @@ void merge_base_index(struct index_state *istate) si->saved_cache_nr = 0; } +/* + * Compare most of the fields in two cache entries, i.e. all except the + * hashmap_entry and the name. + */ +static int compare_ce_content(struct cache_entry *a, struct cache_entry *b) +{ + const unsigned int ondisk_flags = CE_STAGEMASK | CE_VALID | + CE_EXTENDED_FLAGS; + unsigned int ce_flags = a->ce_flags; + unsigned int base_flags = b->ce_flags; + int ret; + + /* only on-disk flags matter */ + a->ce_flags &= ondisk_flags; + b->ce_flags &= ondisk_flags; + ret = memcmp(&a->ce_stat_data, &b->ce_stat_data, + offsetof(struct cache_entry, name) - + offsetof(struct cache_entry, ce_stat_data)); + a->ce_flags = ce_flags; + b->ce_flags = base_flags; + + return ret; +} + void prepare_to_write_split_index(struct index_state *istate) { struct split_index *si = init_split_index(istate); @@ -207,38 +231,109 @@ void prepare_to_write_split_index(struct index_state *istate) */ for (i = 0; i < istate->cache_nr; i++) { struct cache_entry *base; - /* namelen is checked separately */ - const unsigned int ondisk_flags = - CE_STAGEMASK | CE_VALID | CE_EXTENDED_FLAGS; - unsigned int ce_flags, base_flags, ret; ce = istate->cache[i]; - if (!ce->index) + if (!ce->index) { + /* + * During simple update index operations this + * is a cache entry that is not present in + * the shared index. It will be added to the + * split index. + * + * However, it might also represent a file + * that already has a cache entry in the + * shared index, but a new index has just + * been constructed by unpack_trees(), and + * this entry now refers to different content + * than what was recorded in the original + * index, e.g. during 'read-tree -m HEAD^' or + * 'checkout HEAD^'. In this case the + * original entry in the shared index will be + * marked as deleted, and this entry will be + * added to the split index. + */ continue; + } if (ce->index > si->base->cache_nr) { - ce->index = 0; - continue; + BUG("ce refers to a shared ce at %d, which is beyond the shared index size %d", + ce->index, si->base->cache_nr); } ce->ce_flags |= CE_MATCHED; /* or "shared" */ base = si->base->cache[ce->index - 1]; - if (ce == base) + if (ce == base) { + /* The entry is present in the shared index. */ + if (ce->ce_flags & CE_UPDATE_IN_BASE) { + /* + * Already marked for inclusion in + * the split index, either because + * the corresponding file was + * modified and the cached stat data + * was refreshed, or because there + * is already a replacement entry in + * the split index. + * Nothing more to do here. + */ + } else if (!ce_uptodate(ce) && + is_racy_timestamp(istate, ce)) { + /* + * A racily clean cache entry stored + * only in the shared index: it must + * be added to the split index, so + * the subsequent do_write_index() + * can smudge its stat data. + */ + ce->ce_flags |= CE_UPDATE_IN_BASE; + } else { + /* + * The entry is only present in the + * shared index and it was not + * refreshed. + * Just leave it there. + */ + } continue; + } if (ce->ce_namelen != base->ce_namelen || strcmp(ce->name, base->name)) { ce->index = 0; continue; } - ce_flags = ce->ce_flags; - base_flags = base->ce_flags; - /* only on-disk flags matter */ - ce->ce_flags &= ondisk_flags; - base->ce_flags &= ondisk_flags; - ret = memcmp(&ce->ce_stat_data, &base->ce_stat_data, - offsetof(struct cache_entry, name) - - offsetof(struct cache_entry, ce_stat_data)); - ce->ce_flags = ce_flags; - base->ce_flags = base_flags; - if (ret) + /* + * This is the copy of a cache entry that is present + * in the shared index, created by unpack_trees() + * while it constructed a new index. + */ + if (ce->ce_flags & CE_UPDATE_IN_BASE) { + /* + * Already marked for inclusion in the split + * index, either because the corresponding + * file was modified and the cached stat data + * was refreshed, or because the original + * entry already had a replacement entry in + * the split index. + * Nothing to do. + */ + } else if (!ce_uptodate(ce) && + is_racy_timestamp(istate, ce)) { + /* + * A copy of a racily clean cache entry from + * the shared index. It must be added to + * the split index, so the subsequent + * do_write_index() can smudge its stat data. + */ ce->ce_flags |= CE_UPDATE_IN_BASE; + } else { + /* + * Thoroughly compare the cached data to see + * whether it should be marked for inclusion + * in the split index. + * + * This comparison might be unnecessary, as + * code paths modifying the cached data do + * set CE_UPDATE_IN_BASE as well. + */ + if (compare_ce_content(ce, base)) + ce->ce_flags |= CE_UPDATE_IN_BASE; + } discard_cache_entry(base); si->base->cache[ce->index - 1] = ce; } diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh index 3e618cad12..2ac47aa0e4 100755 --- a/t/t1700-split-index.sh +++ b/t/t1700-split-index.sh @@ -13,6 +13,13 @@ sane_unset GIT_TEST_SPLIT_INDEX sane_unset GIT_TEST_FSMONITOR sane_unset GIT_TEST_INDEX_THREADS +# Create a file named as $1 with content read from stdin. +# Set the file's mtime to a few seconds in the past to avoid racy situations. +create_non_racy_file () { + cat >"$1" && + test-tool chmtime =-5 "$1" +} + test_expect_success 'enable split index' ' git config splitIndex.maxPercentChange 100 && git update-index --split-index && @@ -36,7 +43,7 @@ test_expect_success 'enable split index' ' ' test_expect_success 'add one file' ' - : >one && + create_non_racy_file one && git update-index --add one && git ls-files --stage >ls-files.actual && cat >ls-files.expect <<-EOF && @@ -88,7 +95,7 @@ test_expect_success 'enable split index again, "one" now belongs to base index"' ' test_expect_success 'modify original file, base index untouched' ' - echo modified >one && + echo modified | create_non_racy_file one && git update-index one && git ls-files --stage >ls-files.actual && cat >ls-files.expect <<-EOF && @@ -107,7 +114,7 @@ test_expect_success 'modify original file, base index untouched' ' ' test_expect_success 'add another file, which stays index' ' - : >two && + create_non_racy_file two && git update-index --add two && git ls-files --stage >ls-files.actual && cat >ls-files.expect <<-EOF && @@ -160,7 +167,7 @@ test_expect_success 'remove file in base index' ' ' test_expect_success 'add original file back' ' - : >one && + create_non_racy_file one && git update-index --add one && git ls-files --stage >ls-files.actual && cat >ls-files.expect <<-EOF && @@ -179,7 +186,7 @@ test_expect_success 'add original file back' ' ' test_expect_success 'add new file' ' - : >two && + create_non_racy_file two && git update-index --add two && git ls-files --stage >actual && cat >expect <<-EOF && @@ -223,7 +230,7 @@ test_expect_success 'rev-parse --shared-index-path' ' test_expect_success 'set core.splitIndex config variable to true' ' git config core.splitIndex true && - : >three && + create_non_racy_file three && git update-index --add three && git ls-files --stage >ls-files.actual && cat >ls-files.expect <<-EOF && @@ -258,9 +265,9 @@ test_expect_success 'set core.splitIndex config variable to false' ' test_cmp expect actual ' -test_expect_success 'set core.splitIndex config variable to true' ' +test_expect_success 'set core.splitIndex config variable back to true' ' git config core.splitIndex true && - : >three && + create_non_racy_file three && git update-index --add three && BASE=$(test-tool dump-split-index .git/index | grep "^base") && test-tool dump-split-index .git/index | sed "/^own/d" >actual && @@ -270,7 +277,7 @@ test_expect_success 'set core.splitIndex config variable to true' ' deletions: EOF test_cmp expect actual && - : >four && + create_non_racy_file four && git update-index --add four && test-tool dump-split-index .git/index | sed "/^own/d" >actual && cat >expect <<-EOF && @@ -284,7 +291,7 @@ test_expect_success 'set core.splitIndex config variable to true' ' test_expect_success 'check behavior with splitIndex.maxPercentChange unset' ' git config --unset splitIndex.maxPercentChange && - : >five && + create_non_racy_file five && git update-index --add five && BASE=$(test-tool dump-split-index .git/index | grep "^base") && test-tool dump-split-index .git/index | sed "/^own/d" >actual && @@ -294,7 +301,7 @@ test_expect_success 'check behavior with splitIndex.maxPercentChange unset' ' deletions: EOF test_cmp expect actual && - : >six && + create_non_racy_file six && git update-index --add six && test-tool dump-split-index .git/index | sed "/^own/d" >actual && cat >expect <<-EOF && @@ -308,7 +315,7 @@ test_expect_success 'check behavior with splitIndex.maxPercentChange unset' ' test_expect_success 'check splitIndex.maxPercentChange set to 0' ' git config splitIndex.maxPercentChange 0 && - : >seven && + create_non_racy_file seven && git update-index --add seven && BASE=$(test-tool dump-split-index .git/index | grep "^base") && test-tool dump-split-index .git/index | sed "/^own/d" >actual && @@ -318,7 +325,7 @@ test_expect_success 'check splitIndex.maxPercentChange set to 0' ' deletions: EOF test_cmp expect actual && - : >eight && + create_non_racy_file eight && git update-index --add eight && BASE=$(test-tool dump-split-index .git/index | grep "^base") && test-tool dump-split-index .git/index | sed "/^own/d" >actual && @@ -331,17 +338,17 @@ test_expect_success 'check splitIndex.maxPercentChange set to 0' ' ' test_expect_success 'shared index files expire after 2 weeks by default' ' - : >ten && + create_non_racy_file ten && git update-index --add ten && test $(ls .git/sharedindex.* | wc -l) -gt 2 && just_under_2_weeks_ago=$((5-14*86400)) && test-tool chmtime =$just_under_2_weeks_ago .git/sharedindex.* && - : >eleven && + create_non_racy_file eleven && git update-index --add eleven && test $(ls .git/sharedindex.* | wc -l) -gt 2 && just_over_2_weeks_ago=$((-1-14*86400)) && test-tool chmtime =$just_over_2_weeks_ago .git/sharedindex.* && - : >twelve && + create_non_racy_file twelve && git update-index --add twelve && test $(ls .git/sharedindex.* | wc -l) -le 2 ' @@ -349,12 +356,12 @@ test_expect_success 'shared index files expire after 2 weeks by default' ' test_expect_success 'check splitIndex.sharedIndexExpire set to 16 days' ' git config splitIndex.sharedIndexExpire "16.days.ago" && test-tool chmtime =$just_over_2_weeks_ago .git/sharedindex.* && - : >thirteen && + create_non_racy_file thirteen && git update-index --add thirteen && test $(ls .git/sharedindex.* | wc -l) -gt 2 && just_over_16_days_ago=$((-1-16*86400)) && test-tool chmtime =$just_over_16_days_ago .git/sharedindex.* && - : >fourteen && + create_non_racy_file fourteen && git update-index --add fourteen && test $(ls .git/sharedindex.* | wc -l) -le 2 ' @@ -363,13 +370,13 @@ test_expect_success 'check splitIndex.sharedIndexExpire set to "never" and "now" git config splitIndex.sharedIndexExpire never && just_10_years_ago=$((-365*10*86400)) && test-tool chmtime =$just_10_years_ago .git/sharedindex.* && - : >fifteen && + create_non_racy_file fifteen && git update-index --add fifteen && test $(ls .git/sharedindex.* | wc -l) -gt 2 && git config splitIndex.sharedIndexExpire now && just_1_second_ago=-1 && test-tool chmtime =$just_1_second_ago .git/sharedindex.* && - : >sixteen && + create_non_racy_file sixteen && git update-index --add sixteen && test $(ls .git/sharedindex.* | wc -l) -le 2 ' @@ -384,7 +391,7 @@ do # Create one new shared index file git config core.sharedrepository "$mode" && git config core.splitIndex true && - : >one && + create_non_racy_file one && git update-index --add one && echo "$modebits" >expect && test_modebits .git/index >actual && diff --git a/t/t1701-racy-split-index.sh b/t/t1701-racy-split-index.sh new file mode 100755 index 0000000000..5dc221ef38 --- /dev/null +++ b/t/t1701-racy-split-index.sh @@ -0,0 +1,214 @@ +#!/bin/sh + +# This test can give false success if your machine is sufficiently +# slow or all trials happened to happen on second boundaries. + +test_description='racy split index' + +. ./test-lib.sh + +test_expect_success 'setup' ' + # Only split the index when the test explicitly says so. + sane_unset GIT_TEST_SPLIT_INDEX && + git config splitIndex.maxPercentChange 100 && + + echo "cached content" >racy-file && + git add racy-file && + git commit -m initial && + + echo something >other-file && + # No raciness with this file. + test-tool chmtime =-20 other-file && + + echo "+cached content" >expect +' + +check_cached_diff () { + git diff-index --patch --cached $EMPTY_TREE racy-file >diff && + tail -1 diff >actual && + test_cmp expect actual +} + +trials="0 1 2 3 4" +for trial in $trials +do + test_expect_success "split the index while adding a racily clean file #$trial" ' + rm -f .git/index .git/sharedindex.* && + + # The next three commands must be run within the same + # second (so both writes to racy-file result in the same + # mtime) to create the interesting racy situation. + echo "cached content" >racy-file && + + # Update and split the index. The cache entry of + # racy-file will be stored only in the shared index. + git update-index --split-index --add racy-file && + + # File size must stay the same. + echo "dirty worktree" >racy-file && + + # Subsequent git commands should notice that racy-file + # and the split index have the same mtime, and check + # the content of the file to see if it is actually + # clean. + check_cached_diff + ' +done + +for trial in $trials +do + test_expect_success "add a racily clean file to an already split index #$trial" ' + rm -f .git/index .git/sharedindex.* && + + git update-index --split-index && + + # The next three commands must be run within the same + # second. + echo "cached content" >racy-file && + + # Update the split index. The cache entry of racy-file + # will be stored only in the split index. + git update-index --add racy-file && + + # File size must stay the same. + echo "dirty worktree" >racy-file && + + # Subsequent git commands should notice that racy-file + # and the split index have the same mtime, and check + # the content of the file to see if it is actually + # clean. + check_cached_diff + ' +done + +for trial in $trials +do + test_expect_success "split the index when the index contains a racily clean cache entry #$trial" ' + rm -f .git/index .git/sharedindex.* && + + # The next three commands must be run within the same + # second. + echo "cached content" >racy-file && + + git update-index --add racy-file && + + # File size must stay the same. + echo "dirty worktree" >racy-file && + + # Now wait a bit to ensure that the split index written + # below will get a more recent mtime than racy-file. + sleep 1 && + + # Update and split the index when the index contains + # the racily clean cache entry of racy-file. + # A corresponding replacement cache entry with smudged + # stat data should be added to the new split index. + git update-index --split-index --add other-file && + + # Subsequent git commands should notice the smudged + # stat data in the replacement cache entry and that it + # doesnt match with the file the worktree. + check_cached_diff + ' +done + +for trial in $trials +do + test_expect_success "update the split index when it contains a new racily clean cache entry #$trial" ' + rm -f .git/index .git/sharedindex.* && + + git update-index --split-index && + + # The next three commands must be run within the same + # second. + echo "cached content" >racy-file && + + # Update the split index. The cache entry of racy-file + # will be stored only in the split index. + git update-index --add racy-file && + + # File size must stay the same. + echo "dirty worktree" >racy-file && + + # Now wait a bit to ensure that the split index written + # below will get a more recent mtime than racy-file. + sleep 1 && + + # Update the split index when the racily clean cache + # entry of racy-file is only stored in the split index. + # An updated cache entry with smudged stat data should + # be added to the new split index. + git update-index --add other-file && + + # Subsequent git commands should notice the smudged + # stat data. + check_cached_diff + ' +done + +for trial in $trials +do + test_expect_success "update the split index when a racily clean cache entry is stored only in the shared index #$trial" ' + rm -f .git/index .git/sharedindex.* && + + # The next three commands must be run within the same + # second. + echo "cached content" >racy-file && + + # Update and split the index. The cache entry of + # racy-file will be stored only in the shared index. + git update-index --split-index --add racy-file && + + # File size must stay the same. + echo "dirty worktree" >racy-file && + + # Now wait a bit to ensure that the split index written + # below will get a more recent mtime than racy-file. + sleep 1 && + + # Update the split index when the racily clean cache + # entry of racy-file is only stored in the shared index. + # A corresponding replacement cache entry with smudged + # stat data should be added to the new split index. + git update-index --add other-file && + + # Subsequent git commands should notice the smudged + # stat data. + check_cached_diff + ' +done + +for trial in $trials +do + test_expect_success "update the split index after unpack trees() copied a racily clean cache entry from the shared index #$trial" ' + rm -f .git/index .git/sharedindex.* && + + # The next three commands must be run within the same + # second. + echo "cached content" >racy-file && + + # Update and split the index. The cache entry of + # racy-file will be stored only in the shared index. + git update-index --split-index --add racy-file && + + # File size must stay the same. + echo "dirty worktree" >racy-file && + + # Now wait a bit to ensure that the split index written + # below will get a more recent mtime than racy-file. + sleep 1 && + + # Update the split index after unpack_trees() copied the + # racily clean cache entry of racy-file from the shared + # index. A corresponding replacement cache entry + # with smudged stat data should be added to the new + # split index. + git read-tree -m HEAD && + + # Subsequent git commands should notice the smudged + # stat data. + check_cached_diff + ' +done + +test_done |