From 336226c259f3fa2e2c9c1f5ce857f0c163b26928 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 5 Apr 2019 14:03:41 -0400 Subject: packfile.h: drop extern from function declarations As CodingGuidelines recommends, we do not need an "extern" when declaring a public function. Let's drop these. Note that we leave the extern on report_garbage(), as that is actually a function pointer, not a function itself. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- packfile.h | 82 +++++++++++++++++++++++++++++++------------------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/packfile.h b/packfile.h index d70c6d9afb..ea7a690fc6 100644 --- a/packfile.h +++ b/packfile.h @@ -15,23 +15,23 @@ struct object_info; * * Example: odb_pack_name(out, sha1, "idx") => ".git/objects/pack/pack-1234..idx" */ -extern char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, const char *ext); +char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, const char *ext); /* * Return the name of the (local) packfile with the specified sha1 in * its name. The return value is a pointer to memory that is * overwritten each time this function is called. */ -extern char *sha1_pack_name(const unsigned char *sha1); +char *sha1_pack_name(const unsigned char *sha1); /* * Return the name of the (local) pack index file with the specified * sha1 in its name. The return value is a pointer to memory that is * overwritten each time this function is called. */ -extern char *sha1_pack_index_name(const unsigned char *sha1); +char *sha1_pack_index_name(const unsigned char *sha1); -extern struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path); +struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path); typedef void each_file_in_pack_dir_fn(const char *full_path, size_t full_path_len, const char *file_pach, void *data); @@ -45,8 +45,8 @@ void for_each_file_in_pack_dir(const char *objdir, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -extern void reprepare_packed_git(struct repository *r); -extern void install_packed_git(struct repository *r, struct packed_git *pack); +void reprepare_packed_git(struct repository *r); +void install_packed_git(struct repository *r, struct packed_git *pack); struct packed_git *get_packed_git(struct repository *r); struct list_head *get_packed_git_mru(struct repository *r); @@ -59,32 +59,32 @@ struct packed_git *get_all_packs(struct repository *r); */ unsigned long approximate_object_count(void); -extern struct packed_git *find_sha1_pack(const unsigned char *sha1, - struct packed_git *packs); +struct packed_git *find_sha1_pack(const unsigned char *sha1, + struct packed_git *packs); -extern void pack_report(void); +void pack_report(void); /* * mmap the index file for the specified packfile (if it is not * already mmapped). Return 0 on success. */ -extern int open_pack_index(struct packed_git *); +int open_pack_index(struct packed_git *); /* * munmap the index file for the specified packfile (if it is * currently mmapped). */ -extern void close_pack_index(struct packed_git *); +void close_pack_index(struct packed_git *); -extern uint32_t get_pack_fanout(struct packed_git *p, uint32_t value); +uint32_t get_pack_fanout(struct packed_git *p, uint32_t value); -extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *); -extern void close_pack_windows(struct packed_git *); -extern void close_pack(struct packed_git *); -extern void close_all_packs(struct raw_object_store *o); -extern void unuse_pack(struct pack_window **); -extern void clear_delta_base_cache(void); -extern struct packed_git *add_packed_git(const char *path, size_t path_len, int local); +unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *); +void close_pack_windows(struct packed_git *); +void close_pack(struct packed_git *); +void close_all_packs(struct raw_object_store *o); +void unuse_pack(struct pack_window **); +void clear_delta_base_cache(void); +struct packed_git *add_packed_git(const char *path, size_t path_len, int local); /* * Make sure that a pointer access into an mmap'd index file is within bounds, @@ -94,7 +94,7 @@ extern struct packed_git *add_packed_git(const char *path, size_t path_len, int * (like the 64-bit extended offset table), as we compare the size to the * fixed-length parts when we open the file. */ -extern void check_pack_index_ptr(const struct packed_git *p, const void *ptr); +void check_pack_index_ptr(const struct packed_git *p, const void *ptr); /* * Perform binary search on a pack-index for a given oid. Packfile is expected to @@ -110,59 +110,59 @@ int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32 * at the SHA-1 within the mmapped index. Return NULL if there is an * error. */ -extern const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t n); +const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t n); /* * Like nth_packed_object_sha1, but write the data into the object specified by * the the first argument. Returns the first argument on success, and NULL on * error. */ -extern const struct object_id *nth_packed_object_oid(struct object_id *, struct packed_git *, uint32_t n); +const struct object_id *nth_packed_object_oid(struct object_id *, struct packed_git *, uint32_t n); /* * Return the offset of the nth object within the specified packfile. * The index must already be opened. */ -extern off_t nth_packed_object_offset(const struct packed_git *, uint32_t n); +off_t nth_packed_object_offset(const struct packed_git *, uint32_t n); /* * If the object named sha1 is present in the specified packfile, * return its offset within the packfile; otherwise, return 0. */ -extern off_t find_pack_entry_one(const unsigned char *sha1, struct packed_git *); +off_t find_pack_entry_one(const unsigned char *sha1, struct packed_git *); -extern int is_pack_valid(struct packed_git *); -extern void *unpack_entry(struct repository *r, struct packed_git *, off_t, enum object_type *, unsigned long *); -extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); -extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); -extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *); +int is_pack_valid(struct packed_git *); +void *unpack_entry(struct repository *r, struct packed_git *, off_t, enum object_type *, unsigned long *); +unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); +unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); +int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *); -extern void release_pack_memory(size_t); +void release_pack_memory(size_t); /* global flag to enable extra checks when accessing packed objects */ extern int do_check_packed_object_crc; -extern int packed_object_info(struct repository *r, - struct packed_git *pack, - off_t offset, struct object_info *); +int packed_object_info(struct repository *r, + struct packed_git *pack, + off_t offset, struct object_info *); -extern void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1); -extern const struct packed_git *has_packed_and_bad(struct repository *r, const unsigned char *sha1); +void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1); +const struct packed_git *has_packed_and_bad(struct repository *r, const unsigned char *sha1); /* * Iff a pack file in the given repository contains the object named by sha1, * return true and store its location to e. */ -extern int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e); +int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e); -extern int has_object_pack(const struct object_id *oid); +int has_object_pack(const struct object_id *oid); -extern int has_pack_index(const unsigned char *sha1); +int has_pack_index(const unsigned char *sha1); /* * Return 1 if an object in a promisor packfile is or refers to the given * object, 0 otherwise. */ -extern int is_promisor_object(const struct object_id *oid); +int is_promisor_object(const struct object_id *oid); /* * Expose a function for fuzz testing. @@ -174,7 +174,7 @@ extern int is_promisor_object(const struct object_id *oid); * have a convenient entry-point for fuzz testing. For real uses, you should * probably use open_pack_index() or parse_pack_index() instead. */ -extern int load_idx(const char *path, const unsigned int hashsz, void *idx_map, - size_t idx_size, struct packed_git *p); +int load_idx(const char *path, const unsigned int hashsz, void *idx_map, + size_t idx_size, struct packed_git *p); #endif -- cgit v1.2.3 From 4828ce9871fee0ea0309220c461fdedf255df931 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 5 Apr 2019 14:04:24 -0400 Subject: pack-revindex: open index if necessary We can't create a pack revindex if we haven't actually looked at the index. Normally we would never get as far as creating a revindex without having already been looking in the pack, so this code never bothered to double-check that pack->index_data had been loaded. But with the new multi-pack-index feature, many code paths might not load the individual pack .idx at all (they'd find objects via the midx and then open the .pack, but not its index). This can't yet be triggered in practice, because a bug in the midx code means we accidentally open up the individual .idx files anyway. But in preparation for fixing that, let's have the revindex code check that everything it needs has been loaded. In most cases this will just be a quick noop. But note that this does introduce a possibility of error (if we have to open the index and it's corrupt), so load_pack_revindex() now returns a result code, and callers need to handle the error. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pack-bitmap.c | 3 ++- pack-revindex.c | 13 ++++++++++--- pack-revindex.h | 2 +- packfile.c | 6 ++++-- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/pack-bitmap.c b/pack-bitmap.c index 4695aaf6b4..3960ad94c8 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -308,7 +308,8 @@ static int load_pack_bitmap(struct bitmap_index *bitmap_git) bitmap_git->bitmaps = kh_init_sha1(); bitmap_git->ext_index.positions = kh_init_sha1_pos(); - load_pack_revindex(bitmap_git->pack); + if (load_pack_revindex(bitmap_git->pack)) + goto failed; if (!(bitmap_git->commits = read_bitmap_1(bitmap_git)) || !(bitmap_git->trees = read_bitmap_1(bitmap_git)) || diff --git a/pack-revindex.c b/pack-revindex.c index 50891f77a2..d28a7e43d0 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -1,6 +1,7 @@ #include "cache.h" #include "pack-revindex.h" #include "object-store.h" +#include "packfile.h" /* * Pack index for existing packs give us easy access to the offsets into @@ -158,10 +159,14 @@ static void create_pack_revindex(struct packed_git *p) sort_revindex(p->revindex, num_ent, p->pack_size); } -void load_pack_revindex(struct packed_git *p) +int load_pack_revindex(struct packed_git *p) { - if (!p->revindex) + if (!p->revindex) { + if (open_pack_index(p)) + return -1; create_pack_revindex(p); + } + return 0; } int find_revindex_position(struct packed_git *p, off_t ofs) @@ -188,7 +193,9 @@ struct revindex_entry *find_pack_revindex(struct packed_git *p, off_t ofs) { int pos; - load_pack_revindex(p); + if (load_pack_revindex(p)) + return NULL; + pos = find_revindex_position(p, ofs); if (pos < 0) diff --git a/pack-revindex.h b/pack-revindex.h index e262f3efe8..848331d5d6 100644 --- a/pack-revindex.h +++ b/pack-revindex.h @@ -8,7 +8,7 @@ struct revindex_entry { unsigned int nr; }; -void load_pack_revindex(struct packed_git *p); +int load_pack_revindex(struct packed_git *p); int find_revindex_position(struct packed_git *p, off_t ofs); struct revindex_entry *find_pack_revindex(struct packed_git *p, off_t ofs); diff --git a/packfile.c b/packfile.c index 16bcb75262..6e40bd89c7 100644 --- a/packfile.c +++ b/packfile.c @@ -2023,8 +2023,10 @@ int for_each_object_in_pack(struct packed_git *p, uint32_t i; int r = 0; - if (flags & FOR_EACH_OBJECT_PACK_ORDER) - load_pack_revindex(p); + if (flags & FOR_EACH_OBJECT_PACK_ORDER) { + if (load_pack_revindex(p)) + return -1; + } for (i = 0; i < p->num_objects; i++) { uint32_t pos; -- cgit v1.2.3 From b4a14394af96b6a87a722ffb57666bb1edc001b9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 5 Apr 2019 14:04:56 -0400 Subject: t5319: fix bogus cat-file argument There's no such argument as "--unsorted"; it's spelled "--unordered". But our test failed to notice that cat-file didn't run at all because: 1. It lost the exit code of git on the left-hand side of a pipe. 2. It was comparing two runs of the broken invocation with and without a particular config variable (and indeed, both cases produced no output!). Let's fix the option, but also tweak the helper function to check the exit code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t5319-multi-pack-index.sh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 70926b5bc0..42f4d6cd01 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -86,13 +86,14 @@ test_expect_success 'write midx with one v1 pack' ' ' midx_git_two_modes () { + git -c core.multiPackIndex=false $1 >expect && + git -c core.multiPackIndex=true $1 >actual && if [ "$2" = "sorted" ] then - git -c core.multiPackIndex=false $1 | sort >expect && - git -c core.multiPackIndex=true $1 | sort >actual - else - git -c core.multiPackIndex=false $1 >expect && - git -c core.multiPackIndex=true $1 >actual + sort expect.sorted && + mv expect.sorted expect && + sort actual.sorted && + mv actual.sorted actual fi && test_cmp expect actual } @@ -104,7 +105,7 @@ compare_results_with_midx () { midx_git_two_modes "log --raw" && midx_git_two_modes "count-objects --verbose" && midx_git_two_modes "cat-file --batch-all-objects --buffer --batch-check" && - midx_git_two_modes "cat-file --batch-all-objects --buffer --batch-check --unsorted" sorted + midx_git_two_modes "cat-file --batch-all-objects --buffer --batch-check --unordered" sorted ' } -- cgit v1.2.3 From 5670ad98a8c0b5d7cabb5931225c759ec977600f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 5 Apr 2019 14:05:03 -0400 Subject: t5319: drop useless --buffer from cat-file The cat-file --buffer option is the default already when using --batch-all-objects. It doesn't hurt to specify it, but it's nice for the test scripts to model good usage. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t5319-multi-pack-index.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 42f4d6cd01..8c4d2bd849 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -104,8 +104,8 @@ compare_results_with_midx () { midx_git_two_modes "rev-list --objects --all" && midx_git_two_modes "log --raw" && midx_git_two_modes "count-objects --verbose" && - midx_git_two_modes "cat-file --batch-all-objects --buffer --batch-check" && - midx_git_two_modes "cat-file --batch-all-objects --buffer --batch-check --unordered" sorted + midx_git_two_modes "cat-file --batch-all-objects --batch-check" && + midx_git_two_modes "cat-file --batch-all-objects --batch-check --unordered" sorted ' } -- cgit v1.2.3 From 013fd7ada3c81cec8f0c48427c77394028707c2e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 5 Apr 2019 14:06:04 -0400 Subject: midx: check both pack and index names for containment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A midx file (and the struct we parse from it) contains a list of all of the covered packfiles, mentioned by their ".idx" names (e.g., "pack-1234.idx", etc). And thus calls to midx_contains_pack() expect callers to provide the idx name. This works for most of the calls, but the one in open_packed_git_1() tries to feed a packed_git->pack_name, which is the ".pack" name, meaning we'll never find a match (even if the pack is covered by the midx). We can fix this by converting the ".pack" to ".idx" in the caller. However, that requires allocating a new string. Instead, let's make midx_contains_pack() a bit friendlier, and allow it take _either_ the .pack or .idx variant. All cleverness in the matching code is credited to René. Bugs are mine. There's no test here, because while this does fix _a_ bug, it's masked by another bug in that same caller. That will be covered (with a test) in the next patch. Helped-by: René Scharfe Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- midx.c | 36 ++++++++++++++++++++++++++++++++++-- midx.h | 2 +- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/midx.c b/midx.c index 8a505fd423..0ceca1938f 100644 --- a/midx.c +++ b/midx.c @@ -307,7 +307,39 @@ int fill_midx_entry(const struct object_id *oid, struct pack_entry *e, struct mu return nth_midxed_pack_entry(m, e, pos); } -int midx_contains_pack(struct multi_pack_index *m, const char *idx_name) +/* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */ +static int cmp_idx_or_pack_name(const char *idx_or_pack_name, + const char *idx_name) +{ + /* Skip past any initial matching prefix. */ + while (*idx_name && *idx_name == *idx_or_pack_name) { + idx_name++; + idx_or_pack_name++; + } + + /* + * If we didn't match completely, we may have matched "pack-1234." and + * be left with "idx" and "pack" respectively, which is also OK. We do + * not have to check for "idx" and "idx", because that would have been + * a complete match (and in that case these strcmps will be false, but + * we'll correctly return 0 from the final strcmp() below. + * + * Technically this matches "fooidx" and "foopack", but we'd never have + * such names in the first place. + */ + if (!strcmp(idx_name, "idx") && !strcmp(idx_or_pack_name, "pack")) + return 0; + + /* + * This not only checks for a complete match, but also orders based on + * the first non-identical character, which means our ordering will + * match a raw strcmp(). That makes it OK to use this to binary search + * a naively-sorted list. + */ + return strcmp(idx_or_pack_name, idx_name); +} + +int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name) { uint32_t first = 0, last = m->num_packs; @@ -317,7 +349,7 @@ int midx_contains_pack(struct multi_pack_index *m, const char *idx_name) int cmp; current = m->pack_names[mid]; - cmp = strcmp(idx_name, current); + cmp = cmp_idx_or_pack_name(idx_or_pack_name, current); if (!cmp) return 1; if (cmp > 0) { diff --git a/midx.h b/midx.h index 774f652530..26dd042d63 100644 --- a/midx.h +++ b/midx.h @@ -43,7 +43,7 @@ struct object_id *nth_midxed_object_oid(struct object_id *oid, struct multi_pack_index *m, uint32_t n); int fill_midx_entry(const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m); -int midx_contains_pack(struct multi_pack_index *m, const char *idx_name); +int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name); int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local); int write_midx_file(const char *object_dir); -- cgit v1.2.3 From fc78915674ff1bca1726348d7c434dfa0048a5d7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 5 Apr 2019 14:06:22 -0400 Subject: packfile: fix pack basename computation When we have a multi-pack-index that covers many packfiles, we try to avoid opening the .idx for those packfiles. To do that we feed the pack name to midx_contains_pack(). But that function wants to see only the basename, which we compute using strrchr() to find the final slash. But that leaves an extra "/" at the start of our string. We can fix this by incrementing the pointer. That also raises the question of what to do when the name does not have a '/' at all. This should generally not happen (we always find files in "pack/"), but it doesn't hurt to be defensive here. Let's wrap all of that up in a helper function and make it publicly available, since a later patch will need to use it, too. The tests don't notice because there's nothing about opening those .idx files that would cause us to give incorrect output. It's just a little slower. The new test checks this case by corrupting the covered .idx, and then making sure we don't complain about it. We also have to tweak t5570, which intentionally corrupts a .idx file and expects us to notice it. When run with GIT_TEST_MULTI_PACK_INDEX, this will fail since we now will (correctly) not bother opening the .idx at all. We can fix that by unconditionally dropping any midx that's there, which ensures we'll have to read the .idx. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- packfile.c | 12 +++++++++++- packfile.h | 6 ++++++ t/t5319-multi-pack-index.sh | 14 ++++++++++++++ t/t5570-git-daemon.sh | 1 + 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/packfile.c b/packfile.c index 6e40bd89c7..7a2dd2fdbe 100644 --- a/packfile.c +++ b/packfile.c @@ -466,6 +466,16 @@ static unsigned int get_max_fd_limit(void) #endif } +const char *pack_basename(struct packed_git *p) +{ + const char *ret = strrchr(p->pack_name, '/'); + if (ret) + ret = ret + 1; /* skip past slash */ + else + ret = p->pack_name; /* we only have a base */ + return ret; +} + /* * Do not call this directly as this leaks p->pack_fd on error return; * call open_packed_git() instead. @@ -482,7 +492,7 @@ static int open_packed_git_1(struct packed_git *p) if (!p->index_data) { struct multi_pack_index *m; - const char *pack_name = strrchr(p->pack_name, '/'); + const char *pack_name = pack_basename(p); for (m = the_repository->objects->multi_pack_index; m; m = m->next) { diff --git a/packfile.h b/packfile.h index ea7a690fc6..fe05fe0303 100644 --- a/packfile.h +++ b/packfile.h @@ -31,6 +31,12 @@ char *sha1_pack_name(const unsigned char *sha1); */ char *sha1_pack_index_name(const unsigned char *sha1); +/* + * Return the basename of the packfile, omitting any containing directory + * (e.g., "pack-1234abcd[...].pack"). + */ +const char *pack_basename(struct packed_git *p); + struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path); typedef void each_file_in_pack_dir_fn(const char *full_path, size_t full_path_len, diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 8c4d2bd849..1ebf19ec3c 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -117,6 +117,20 @@ test_expect_success 'write midx with one v2 pack' ' compare_results_with_midx "one v2 pack" +test_expect_success 'corrupt idx not opened' ' + idx=$(test-tool read-midx $objdir | grep "\.idx\$") && + mv $objdir/pack/$idx backup-$idx && + test_when_finished "mv backup-\$idx \$objdir/pack/\$idx" && + + # This is the minimum size for a sha-1 based .idx; this lets + # us pass perfunctory tests, but anything that actually opens and reads + # the idx file will complain. + test_copy_bytes 1064 $objdir/pack/$idx && + + git -c core.multiPackIndex=true rev-list --objects --all 2>err && + test_must_be_empty err +' + test_expect_success 'add more objects' ' for i in $(test_seq 6 10) do diff --git a/t/t5570-git-daemon.sh b/t/t5570-git-daemon.sh index 58ee787685..19e271bda6 100755 --- a/t/t5570-git-daemon.sh +++ b/t/t5570-git-daemon.sh @@ -90,6 +90,7 @@ test_expect_success 'fetch notices corrupt pack' ' test_expect_success 'fetch notices corrupt idx' ' cp -R "$GIT_DAEMON_DOCUMENT_ROOT_PATH"/repo_pack.git "$GIT_DAEMON_DOCUMENT_ROOT_PATH"/repo_bad2.git && (cd "$GIT_DAEMON_DOCUMENT_ROOT_PATH"/repo_bad2.git && + rm -f objects/pack/multi-pack-index && p=$(ls objects/pack/pack-*.idx) && chmod u+w $p && printf %0256d 0 | dd of=$p bs=256 count=1 seek=1 conv=notrunc -- cgit v1.2.3 From ddc56d4710fa004c922349407f3de0c3adf90ac9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 5 Apr 2019 14:12:55 -0400 Subject: http: simplify parsing of remote objects/info/packs We can use skip_prefix() and parse_oid_hex() to continuously increment our pointer, rather than dealing with magic numbers. This also fixes a few small shortcomings: - if we see a line with the right prefix, suffix, and length, i.e. matching /P pack-.{40}.pack\n/, we'll interpret the middle part as hex without checking if it could be parsed. This could lead to us looking at uninitialized garbage in the hash array. In practice this means we'll just make a garbage request to the server which will fail, though it's interesting that a malicious server could convince us to leak 40 bytes of uninitialized stack to them. - the current code is picky about seeing a newline at the end of file, but we can easily be more liberal Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- http.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/http.c b/http.c index a32ad36ddf..2ef47bc779 100644 --- a/http.c +++ b/http.c @@ -2147,11 +2147,11 @@ add_pack: int http_get_info_packs(const char *base_url, struct packed_git **packs_head) { struct http_get_options options = {0}; - int ret = 0, i = 0; - char *url, *data; + int ret = 0; + char *url; + const char *data; struct strbuf buf = STRBUF_INIT; - unsigned char hash[GIT_MAX_RAWSZ]; - const unsigned hexsz = the_hash_algo->hexsz; + struct object_id oid; end_url_with_slash(&buf, base_url); strbuf_addstr(&buf, "objects/info/packs"); @@ -2163,24 +2163,17 @@ int http_get_info_packs(const char *base_url, struct packed_git **packs_head) goto cleanup; data = buf.buf; - while (i < buf.len) { - switch (data[i]) { - case 'P': - i++; - if (i + hexsz + 12 <= buf.len && - starts_with(data + i, " pack-") && - starts_with(data + i + hexsz + 6, ".pack\n")) { - get_sha1_hex(data + i + 6, hash); - fetch_and_setup_pack_index(packs_head, hash, - base_url); - i += hexsz + 11; - break; - } - default: - while (i < buf.len && data[i] != '\n') - i++; + while (*data) { + if (skip_prefix(data, "P pack-", &data) && + !parse_oid_hex(data, &oid, &data) && + skip_prefix(data, ".pack", &data) && + (*data == '\n' || *data == '\0')) { + fetch_and_setup_pack_index(packs_head, oid.hash, base_url); + } else { + data = strchrnul(data, '\n'); } - i++; + if (*data) + data++; /* skip past newline */ } cleanup: -- cgit v1.2.3 From b83a3089b584f622054e85b9bacbd18014259b7c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 5 Apr 2019 14:13:10 -0400 Subject: server-info: fix blind pointer arithmetic When we're writing out a new objects/info/packs file, we read back the old one to try to keep the ordering the same. When we see a line starting with "P", we expect "P pack-1234..." and blindly jump to "line + 2" to parse the pack name. If we saw a line with _just_ "P" and nothing else, we'd jump past the end of the buffer and start reading arbitrary memory. This shouldn't be a big attack vector, as the files are local to the repository and written by us, but it's clearly worth fixing (we do read remote copies of the file for dumb-http fetches, but using a totally different parser!). Let's instead use skip_prefix() here, which avoids pointer arithmetic altogether. Note that this converts our switch statement to an if/else chain, making it slightly more verbose. But it will also make it easier to do a few follow-on cleanups. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- server-info.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/server-info.c b/server-info.c index e2b2d6a27a..b61a6be4c2 100644 --- a/server-info.c +++ b/server-info.c @@ -112,9 +112,9 @@ static struct pack_info *find_pack_by_name(const char *name) /* Returns non-zero when we detect that the info in the * old file is useless. */ -static int parse_pack_def(const char *line, int old_cnt) +static int parse_pack_def(const char *packname, int old_cnt) { - struct pack_info *i = find_pack_by_name(line + 2); + struct pack_info *i = find_pack_by_name(packname); if (i) { i->old_num = old_cnt; return 0; @@ -139,6 +139,7 @@ static int read_pack_info_file(const char *infofile) return 1; /* nonexistent is not an error. */ while (fgets(line, sizeof(line), fp)) { + const char *arg; int len = strlen(line); if (len && line[len-1] == '\n') line[--len] = 0; @@ -146,17 +147,18 @@ static int read_pack_info_file(const char *infofile) if (!len) continue; - switch (line[0]) { - case 'P': /* P name */ - if (parse_pack_def(line, old_cnt++)) + if (skip_prefix(line, "P ", &arg)) { + /* P name */ + if (parse_pack_def(arg, old_cnt++)) goto out_stale; - break; - case 'D': /* we used to emit D but that was misguided. */ - case 'T': /* we used to emit T but nobody uses it. */ + } else if (line[0] == 'D') { + /* we used to emit D but that was misguided. */ goto out_stale; - default: + } else if (line[0] == 'T') { + /* we used to emit T but nobody uses it. */ + goto out_stale; + } else { error("unrecognized: %s", line); - break; } } fclose(fp); -- cgit v1.2.3 From 965cc517e57f01d8013fa7b10c1d5ce72ba3e272 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 5 Apr 2019 14:13:14 -0400 Subject: server-info: simplify cleanup in parse_pack_def() We have two exits from the function: either we jump to the out_stale label or not. But in both exits we repeat our cleanup, and the only difference is our return value. Let's just use a variable for the return value to avoid repeating ourselves. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- server-info.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/server-info.c b/server-info.c index b61a6be4c2..ba44cece7f 100644 --- a/server-info.c +++ b/server-info.c @@ -133,6 +133,7 @@ static int read_pack_info_file(const char *infofile) FILE *fp; char line[1000]; int old_cnt = 0; + int stale = 1; fp = fopen_or_warn(infofile, "r"); if (!fp) @@ -161,11 +162,11 @@ static int read_pack_info_file(const char *infofile) error("unrecognized: %s", line); } } - fclose(fp); - return 0; + stale = 0; + out_stale: fclose(fp); - return 1; + return stale; } static int compare_info(const void *a_, const void *b_) -- cgit v1.2.3 From 4ecbd6492cde3fcf003c387414b3e9983fb2ba4b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 5 Apr 2019 14:13:56 -0400 Subject: server-info: use strbuf to read old info/packs file This old code uses fgets with a fixed-size buffer. Let's use a strbuf instead, so we don't have to wonder if "1000" is big enough, or what happens if we see a long line. This also lets us drop our custom code to trim the newline. Probably nobody actually cares about the 1000-char limit (after all, the lines generally only say "P pack-[0-9a-f]{40}.pack"), so this is mostly just about cleanup/readability. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- server-info.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/server-info.c b/server-info.c index ba44cece7f..d4115fecbb 100644 --- a/server-info.c +++ b/server-info.c @@ -131,7 +131,7 @@ static int parse_pack_def(const char *packname, int old_cnt) static int read_pack_info_file(const char *infofile) { FILE *fp; - char line[1000]; + struct strbuf line = STRBUF_INIT; int old_cnt = 0; int stale = 1; @@ -139,32 +139,30 @@ static int read_pack_info_file(const char *infofile) if (!fp) return 1; /* nonexistent is not an error. */ - while (fgets(line, sizeof(line), fp)) { + while (strbuf_getline(&line, fp) != EOF) { const char *arg; - int len = strlen(line); - if (len && line[len-1] == '\n') - line[--len] = 0; - if (!len) + if (!line.len) continue; - if (skip_prefix(line, "P ", &arg)) { + if (skip_prefix(line.buf, "P ", &arg)) { /* P name */ if (parse_pack_def(arg, old_cnt++)) goto out_stale; - } else if (line[0] == 'D') { + } else if (line.buf[0] == 'D') { /* we used to emit D but that was misguided. */ goto out_stale; - } else if (line[0] == 'T') { + } else if (line.buf[0] == 'T') { /* we used to emit T but nobody uses it. */ goto out_stale; } else { - error("unrecognized: %s", line); + error("unrecognized: %s", line.buf); } } stale = 0; out_stale: + strbuf_release(&line); fclose(fp); return stale; } -- cgit v1.2.3 From 79bb8b3c80b902f43d2bc7cdf3afa2ffc2668c32 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 5 Apr 2019 14:14:00 -0400 Subject: server-info: drop nr_alloc struct member We keep an array of struct pointers, with each one representing a single packfile. But for some reason there is a nr_alloc parameter inside each struct, which has never been used. This is probably cruft left over from development, where we might have wanted a nr_alloc to dynamically grow the list. But as it turns out, we do not dynamically grow the list at all, but rather count up the total number of packs and use that as a maximum size. So while we're thinking of this, let's add an assert() that documents (and checks!) that our allocation and fill loops stay in sync. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- server-info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server-info.c b/server-info.c index d4115fecbb..c9fbfd3a51 100644 --- a/server-info.c +++ b/server-info.c @@ -91,7 +91,6 @@ static struct pack_info { struct packed_git *p; int old_num; int new_num; - int nr_alloc; } **info; static int num_pack; static const char *objdir; @@ -213,6 +212,7 @@ static void init_pack_info(const char *infofile, int force) for (i = 0, p = get_all_packs(the_repository); p; p = p->next) { if (!p->pack_local) continue; + assert(i < num_pack); info[i] = xcalloc(1, sizeof(struct pack_info)); info[i]->p = p; info[i]->old_num = -1; -- cgit v1.2.3 From b9fb142eac8053afc43344ef83bca0275d59b438 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 5 Apr 2019 14:14:04 -0400 Subject: server-info: drop objdirlen pointer arithmetic When writing objects/info/packs, we use the basename of each pack (i.e., just the "pack-1234abcd.pack" part). We compute that manually by adding "objdirlen + 6" to the name. This _should_ work consistently, as we do not include non-local packs, meaning everything should be in $objdir/pack/. Before f13d7db4af (server-info.c: use pack_local like everybody else., 2005-12-05), this was definitely true, since we computed "local" based on comparing the objdir string. Since then, we're relying on the code on packfile.c to match our expectations of p->pack_name and p->local. I think our expectations do still hold today, but we can be a bit more defensive by just using pack_basename() to get the base. That future-proofs us, and should hopefully be more obviously safe to somebody reading the code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- server-info.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/server-info.c b/server-info.c index c9fbfd3a51..ab03c1b3c2 100644 --- a/server-info.c +++ b/server-info.c @@ -93,16 +93,13 @@ static struct pack_info { int new_num; } **info; static int num_pack; -static const char *objdir; -static int objdirlen; static struct pack_info *find_pack_by_name(const char *name) { int i; for (i = 0; i < num_pack; i++) { struct packed_git *p = info[i]->p; - /* skip "/pack/" after ".git/objects" */ - if (!strcmp(p->pack_name + objdirlen + 6, name)) + if (!strcmp(pack_basename(p), name)) return info[i]; } return NULL; @@ -196,9 +193,6 @@ static void init_pack_info(const char *infofile, int force) int stale; int i = 0; - objdir = get_object_directory(); - objdirlen = strlen(objdir); - for (p = get_all_packs(the_repository); p; p = p->next) { /* we ignore things on alternate path since they are * not available to the pullers in general. @@ -246,7 +240,7 @@ static int write_pack_info_file(FILE *fp) { int i; for (i = 0; i < num_pack; i++) { - if (fprintf(fp, "P %s\n", info[i]->p->pack_name + objdirlen + 6) < 0) + if (fprintf(fp, "P %s\n", pack_basename(info[i]->p)) < 0) return -1; } if (fputc('\n', fp) == EOF) -- cgit v1.2.3 From b3223761c8670bdfb667e39c78b6d32a7aa4cb80 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 5 Apr 2019 14:14:07 -0400 Subject: update_info_refs(): drop unused force parameter Once upon a time the force flag meant something when writing info/refs, but it hasn't done anything since 60d0526aaa (Unoptimize info/refs creation., 2005-09-14). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- server-info.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server-info.c b/server-info.c index ab03c1b3c2..41274d098b 100644 --- a/server-info.c +++ b/server-info.c @@ -78,7 +78,7 @@ static int generate_info_refs(FILE *fp) return for_each_ref(add_info_ref, fp); } -static int update_info_refs(int force) +static int update_info_refs(void) { char *path = git_pathdup("info/refs"); int ret = update_info_file(path, generate_info_refs); @@ -269,7 +269,7 @@ int update_server_info(int force) */ int errs = 0; - errs = errs | update_info_refs(force); + errs = errs | update_info_refs(); errs = errs | update_info_packs(force); /* remove leftover rev-cache file if there is any */ -- cgit v1.2.3