summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sha1-lookup.c47
-rw-r--r--t/lib-pack.sh100
-rwxr-xr-xt/t5308-pack-detect-duplicates.sh80
-rwxr-xr-xt/t5309-pack-delta-cycles.sh77
-rw-r--r--test-sha1.c15
5 files changed, 316 insertions, 3 deletions
diff --git a/sha1-lookup.c b/sha1-lookup.c
index c4dc55d1f5..2dd851598a 100644
--- a/sha1-lookup.c
+++ b/sha1-lookup.c
@@ -204,7 +204,54 @@ int sha1_entry_pos(const void *table,
* byte 0 thru (ofs-1) are the same between
* lo and hi; ofs is the first byte that is
* different.
+ *
+ * If ofs==20, then no bytes are different,
+ * meaning we have entries with duplicate
+ * keys. We know that we are in a solid run
+ * of this entry (because the entries are
+ * sorted, and our lo and hi are the same,
+ * there can be nothing but this single key
+ * in between). So we can stop the search.
+ * Either one of these entries is it (and
+ * we do not care which), or we do not have
+ * it.
+ *
+ * Furthermore, we know that one of our
+ * endpoints must be the edge of the run of
+ * duplicates. For example, given this
+ * sequence:
+ *
+ * idx 0 1 2 3 4 5
+ * key A C C C C D
+ *
+ * If we are searching for "B", we might
+ * hit the duplicate run at lo=1, hi=3
+ * (e.g., by first mi=3, then mi=0). But we
+ * can never have lo > 1, because B < C.
+ * That is, if our key is less than the
+ * run, we know that "lo" is the edge, but
+ * we can say nothing of "hi". Similarly,
+ * if our key is greater than the run, we
+ * know that "hi" is the edge, but we can
+ * say nothing of "lo".
+ *
+ * Therefore if we do not find it, we also
+ * know where it would go if it did exist:
+ * just on the far side of the edge that we
+ * know about.
*/
+ if (ofs == 20) {
+ mi = lo;
+ mi_key = base + elem_size * mi + key_offset;
+ cmp = memcmp(mi_key, key, 20);
+ if (!cmp)
+ return mi;
+ if (cmp < 0)
+ return -1 - hi;
+ else
+ return -1 - lo;
+ }
+
hiv = hi_key[ofs_0];
if (ofs_0 < 19)
hiv = (hiv << 8) | hi_key[ofs_0+1];
diff --git a/t/lib-pack.sh b/t/lib-pack.sh
new file mode 100644
index 0000000000..7e8685b44c
--- /dev/null
+++ b/t/lib-pack.sh
@@ -0,0 +1,100 @@
+#!/bin/sh
+#
+# Support routines for hand-crafting weird or malicious packs.
+#
+# You can make a complete pack like:
+#
+# pack_header 2 >foo.pack &&
+# pack_obj e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 >>foo.pack &&
+# pack_obj e68fe8129b546b101aee9510c5328e7f21ca1d18 >>foo.pack &&
+# pack_trailer foo.pack
+
+# Print the big-endian 4-byte octal representation of $1
+uint32_octal () {
+ n=$1
+ printf '\%o' $(($n / 16777216)); n=$((n % 16777216))
+ printf '\%o' $(($n / 65536)); n=$((n % 65536))
+ printf '\%o' $(($n / 256)); n=$((n % 256))
+ printf '\%o' $(($n ));
+}
+
+# Print the big-endian 4-byte binary representation of $1
+uint32_binary () {
+ printf "$(uint32_octal "$1")"
+}
+
+# Print a pack header, version 2, for a pack with $1 objects
+pack_header () {
+ printf 'PACK' &&
+ printf '\0\0\0\2' &&
+ uint32_binary "$1"
+}
+
+# Print the pack data for object $1, as a delta against object $2 (or as a full
+# object if $2 is missing or empty). The output is suitable for including
+# directly in the packfile, and represents the entirety of the object entry.
+# Doing this on the fly (especially picking your deltas) is quite tricky, so we
+# have hardcoded some well-known objects. See the case statements below for the
+# complete list.
+pack_obj () {
+ case "$1" in
+ # empty blob
+ e69de29bb2d1d6434b8b29ae775ad8c2e48c5391)
+ case "$2" in
+ '')
+ printf '\060\170\234\003\0\0\0\0\1'
+ return
+ ;;
+ esac
+ ;;
+
+ # blob containing "\7\76"
+ e68fe8129b546b101aee9510c5328e7f21ca1d18)
+ case "$2" in
+ '')
+ printf '\062\170\234\143\267\3\0\0\116\0\106'
+ return
+ ;;
+ 01d7713666f4de822776c7622c10f1b07de280dc)
+ printf '\165\1\327\161\66\146\364\336\202\47\166' &&
+ printf '\307\142\54\20\361\260\175\342\200\334\170' &&
+ printf '\234\143\142\142\142\267\003\0\0\151\0\114'
+ return
+ ;;
+ esac
+ ;;
+
+ # blob containing "\7\0"
+ 01d7713666f4de822776c7622c10f1b07de280dc)
+ case "$2" in
+ '')
+ printf '\062\170\234\143\147\0\0\0\20\0\10'
+ return
+ ;;
+ e68fe8129b546b101aee9510c5328e7f21ca1d18)
+ printf '\165\346\217\350\22\233\124\153\20\32\356' &&
+ printf '\225\20\305\62\216\177\41\312\35\30\170\234' &&
+ printf '\143\142\142\142\147\0\0\0\53\0\16'
+ return
+ ;;
+ esac
+ ;;
+ esac
+
+ echo >&2 "BUG: don't know how to print $1${2:+ (from $2)}"
+ return 1
+}
+
+# Compute and append pack trailer to "$1"
+pack_trailer () {
+ test-sha1 -b <"$1" >trailer.tmp &&
+ cat trailer.tmp >>"$1" &&
+ rm -f trailer.tmp
+}
+
+# Remove any existing packs to make sure that
+# whatever we index next will be the pack that we
+# actually use.
+clear_packs () {
+ rm -f .git/objects/pack/*
+}
diff --git a/t/t5308-pack-detect-duplicates.sh b/t/t5308-pack-detect-duplicates.sh
new file mode 100755
index 0000000000..9c5a8766ab
--- /dev/null
+++ b/t/t5308-pack-detect-duplicates.sh
@@ -0,0 +1,80 @@
+#!/bin/sh
+
+test_description='handling of duplicate objects in incoming packfiles'
+. ./test-lib.sh
+. "$TEST_DIRECTORY"/lib-pack.sh
+
+# The sha1s we have in our pack. It's important that these have the same
+# starting byte, so that they end up in the same fanout section of the index.
+# That lets us make sure we are exercising the binary search with both sets.
+LO_SHA1=e68fe8129b546b101aee9510c5328e7f21ca1d18
+HI_SHA1=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
+
+# And here's a "missing sha1" which will produce failed lookups. It must also
+# be in the same fanout section, and should be between the two (so that during
+# our binary search, we are sure to end up looking at one or the other of the
+# duplicate runs).
+MISSING_SHA1='e69d000000000000000000000000000000000000'
+
+# git will never intentionally create packfiles with
+# duplicate objects, so we have to construct them by hand.
+#
+# $1 is the name of the packfile to create
+#
+# $2 is the number of times to duplicate each object
+create_pack () {
+ pack_header "$((2 * $2))" >"$1" &&
+ for i in $(test_seq 1 "$2"); do
+ pack_obj $LO_SHA1 &&
+ pack_obj $HI_SHA1
+ done >>"$1" &&
+ pack_trailer "$1"
+}
+
+# double-check that create_pack actually works
+test_expect_success 'pack with no duplicates' '
+ create_pack no-dups.pack 1 &&
+ git index-pack --stdin <no-dups.pack
+'
+
+test_expect_success 'index-pack will allow duplicate objects by default' '
+ clear_packs &&
+ create_pack dups.pack 100 &&
+ git index-pack --stdin <dups.pack
+'
+
+test_expect_success 'create batch-check test vectors' '
+ cat >input <<-EOF &&
+ $LO_SHA1
+ $HI_SHA1
+ $MISSING_SHA1
+ EOF
+ cat >expect <<-EOF
+ $LO_SHA1 blob 2
+ $HI_SHA1 blob 0
+ $MISSING_SHA1 missing
+ EOF
+'
+
+test_expect_success 'lookup in duplicated pack (binary search)' '
+ git cat-file --batch-check <input >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'lookup in duplicated pack (GIT_USE_LOOKUP)' '
+ (
+ GIT_USE_LOOKUP=1 &&
+ export GIT_USE_LOOKUP &&
+ git cat-file --batch-check <input >actual
+ ) &&
+ test_cmp expect actual
+'
+
+test_expect_success 'index-pack can reject packs with duplicates' '
+ clear_packs &&
+ create_pack dups.pack 2 &&
+ test_must_fail git index-pack --strict --stdin <dups.pack &&
+ test_expect_code 1 git cat-file -e $LO_SHA1
+'
+
+test_done
diff --git a/t/t5309-pack-delta-cycles.sh b/t/t5309-pack-delta-cycles.sh
new file mode 100755
index 0000000000..3e7861b075
--- /dev/null
+++ b/t/t5309-pack-delta-cycles.sh
@@ -0,0 +1,77 @@
+#!/bin/sh
+
+test_description='test index-pack handling of delta cycles in packfiles'
+. ./test-lib.sh
+. "$TEST_DIRECTORY"/lib-pack.sh
+
+# Two similar-ish objects that we have computed deltas between.
+A=01d7713666f4de822776c7622c10f1b07de280dc
+B=e68fe8129b546b101aee9510c5328e7f21ca1d18
+
+# double-check our hand-constucted packs
+test_expect_success 'index-pack works with a single delta (A->B)' '
+ clear_packs &&
+ {
+ pack_header 2 &&
+ pack_obj $A $B &&
+ pack_obj $B
+ } >ab.pack &&
+ pack_trailer ab.pack &&
+ git index-pack --stdin <ab.pack &&
+ git cat-file -t $A &&
+ git cat-file -t $B
+'
+
+test_expect_success 'index-pack works with a single delta (B->A)' '
+ clear_packs &&
+ {
+ pack_header 2 &&
+ pack_obj $A &&
+ pack_obj $B $A
+ } >ba.pack &&
+ pack_trailer ba.pack &&
+ git index-pack --stdin <ba.pack &&
+ git cat-file -t $A &&
+ git cat-file -t $B
+'
+
+test_expect_success 'index-pack detects missing base objects' '
+ clear_packs &&
+ {
+ pack_header 1 &&
+ pack_obj $A $B
+ } >missing.pack &&
+ pack_trailer missing.pack &&
+ test_must_fail git index-pack --fix-thin --stdin <missing.pack
+'
+
+test_expect_success 'index-pack detects REF_DELTA cycles' '
+ clear_packs &&
+ {
+ pack_header 2 &&
+ pack_obj $A $B &&
+ pack_obj $B $A
+ } >cycle.pack &&
+ pack_trailer cycle.pack &&
+ test_must_fail git index-pack --fix-thin --stdin <cycle.pack
+'
+
+test_expect_failure 'failover to an object in another pack' '
+ clear_packs &&
+ git index-pack --stdin <ab.pack &&
+ git index-pack --stdin --fix-thin <cycle.pack
+'
+
+test_expect_failure 'failover to a duplicate object in the same pack' '
+ clear_packs &&
+ {
+ pack_header 3 &&
+ pack_obj $A $B &&
+ pack_obj $B $A &&
+ pack_obj $A
+ } >recoverable.pack &&
+ pack_trailer recoverable.pack &&
+ git index-pack --fix-thin --stdin <recoverable.pack
+'
+
+test_done
diff --git a/test-sha1.c b/test-sha1.c
index 80daba980e..e57eae10bf 100644
--- a/test-sha1.c
+++ b/test-sha1.c
@@ -5,10 +5,15 @@ int main(int ac, char **av)
git_SHA_CTX ctx;
unsigned char sha1[20];
unsigned bufsz = 8192;
+ int binary = 0;
char *buffer;
- if (ac == 2)
- bufsz = strtoul(av[1], NULL, 10) * 1024 * 1024;
+ if (ac == 2) {
+ if (!strcmp(av[1], "-b"))
+ binary = 1;
+ else
+ bufsz = strtoul(av[1], NULL, 10) * 1024 * 1024;
+ }
if (!bufsz)
bufsz = 8192;
@@ -42,6 +47,10 @@ int main(int ac, char **av)
git_SHA1_Update(&ctx, buffer, this_sz);
}
git_SHA1_Final(sha1, &ctx);
- puts(sha1_to_hex(sha1));
+
+ if (binary)
+ fwrite(sha1, 1, 20, stdout);
+ else
+ puts(sha1_to_hex(sha1));
exit(0);
}