diff options
-rw-r--r-- | Documentation/git-cat-file.txt | 5 | ||||
-rw-r--r-- | builtin/cat-file.c | 51 | ||||
-rw-r--r-- | cache.h | 2 | ||||
-rw-r--r-- | sha1_file.c | 128 | ||||
-rwxr-xr-x | t/t1006-cat-file.sh | 45 |
5 files changed, 188 insertions, 43 deletions
diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt index f6a16f4300..499ae7b98a 100644 --- a/Documentation/git-cat-file.txt +++ b/Documentation/git-cat-file.txt @@ -9,7 +9,7 @@ git-cat-file - Provide content or type and size information for repository objec SYNOPSIS -------- [verse] -'git cat-file' (-t | -s | -e | -p | <type> | --textconv ) <object> +'git cat-file' (-t [--allow-unknown-type]| -s [--allow-unknown-type]| -e | -p | <type> | --textconv ) <object> 'git cat-file' (--batch | --batch-check) < <list-of-objects> DESCRIPTION @@ -69,6 +69,9 @@ OPTIONS not be combined with any other options or arguments. See the section `BATCH OUTPUT` below for details. +--allow-unknown-type:: + Allow -s or -t to query broken/corrupt objects of unknown type. + OUTPUT ------ If '-t' is specified, one of the <type>. diff --git a/builtin/cat-file.c b/builtin/cat-file.c index df99df4db1..ecb488822f 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -9,13 +9,20 @@ #include "userdiff.h" #include "streaming.h" -static int cat_one_file(int opt, const char *exp_type, const char *obj_name) +static int cat_one_file(int opt, const char *exp_type, const char *obj_name, + int unknown_type) { unsigned char sha1[20]; enum object_type type; char *buf; unsigned long size; struct object_context obj_context; + struct object_info oi = {NULL}; + struct strbuf sb = STRBUF_INIT; + unsigned flags = LOOKUP_REPLACE_OBJECT; + + if (unknown_type) + flags |= LOOKUP_UNKNOWN_OBJECT; if (get_sha1_with_context(obj_name, 0, sha1, &obj_context)) die("Not a valid object name %s", obj_name); @@ -23,20 +30,22 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) buf = NULL; switch (opt) { case 't': - type = sha1_object_info(sha1, NULL); - if (type > 0) { - printf("%s\n", typename(type)); + oi.typename = &sb; + if (sha1_object_info_extended(sha1, &oi, flags) < 0) + die("git cat-file: could not get object info"); + if (sb.len) { + printf("%s\n", sb.buf); + strbuf_release(&sb); return 0; } break; case 's': - type = sha1_object_info(sha1, &size); - if (type > 0) { - printf("%lu\n", size); - return 0; - } - break; + oi.sizep = &size; + if (sha1_object_info_extended(sha1, &oi, flags) < 0) + die("git cat-file: could not get object info"); + printf("%lu\n", size); + return 0; case 'e': return !has_sha1_file(sha1); @@ -323,7 +332,7 @@ static int batch_objects(struct batch_options *opt) } static const char * const cat_file_usage[] = { - N_("git cat-file (-t | -s | -e | -p | <type> | --textconv) <object>"), + N_("git cat-file (-t [--allow-unknown-type]|-s [--allow-unknown-type]|-e|-p|<type>|--textconv) <object>"), N_("git cat-file (--batch | --batch-check) < <list-of-objects>"), NULL }; @@ -359,16 +368,19 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix) int opt = 0; const char *exp_type = NULL, *obj_name = NULL; struct batch_options batch = {0}; + int unknown_type = 0; const struct option options[] = { OPT_GROUP(N_("<type> can be one of: blob, tree, commit, tag")), - OPT_SET_INT('t', NULL, &opt, N_("show object type"), 't'), - OPT_SET_INT('s', NULL, &opt, N_("show object size"), 's'), - OPT_SET_INT('e', NULL, &opt, + OPT_CMDMODE('t', NULL, &opt, N_("show object type"), 't'), + OPT_CMDMODE('s', NULL, &opt, N_("show object size"), 's'), + OPT_CMDMODE('e', NULL, &opt, N_("exit with zero when there's no error"), 'e'), - OPT_SET_INT('p', NULL, &opt, N_("pretty-print object's content"), 'p'), - OPT_SET_INT(0, "textconv", &opt, + OPT_CMDMODE('p', NULL, &opt, N_("pretty-print object's content"), 'p'), + OPT_CMDMODE(0, "textconv", &opt, N_("for blob objects, run textconv on object's content"), 'c'), + OPT_BOOL( 0, "allow-unknown-type", &unknown_type, + N_("allow -s and -t to work with broken/corrupt objects")), { OPTION_CALLBACK, 0, "batch", &batch, "format", N_("show info and content of objects fed from the standard input"), PARSE_OPT_OPTARG, batch_option_callback }, @@ -380,9 +392,6 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix) git_config(git_cat_file_config, NULL); - if (argc != 3 && argc != 2) - usage_with_options(cat_file_usage, options); - argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0); if (opt) { @@ -405,5 +414,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix) if (batch.enabled) return batch_objects(&batch); - return cat_one_file(opt, exp_type, obj_name); + if (unknown_type && opt != 't' && opt != 's') + die("git cat-file --allow-unknown-type: use with -s or -t"); + return cat_one_file(opt, exp_type, obj_name, unknown_type); } @@ -879,6 +879,7 @@ extern char *xdg_config_home(const char *filename); /* object replacement */ #define LOOKUP_REPLACE_OBJECT 1 +#define LOOKUP_UNKNOWN_OBJECT 2 extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag); static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size) { @@ -1351,6 +1352,7 @@ struct object_info { unsigned long *sizep; unsigned long *disk_sizep; unsigned char *delta_base_sha1; + struct strbuf *typename; /* Response */ enum { diff --git a/sha1_file.c b/sha1_file.c index 866021b2b1..ccc6dac54b 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1564,6 +1564,40 @@ int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long ma return git_inflate(stream, 0); } +static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map, + unsigned long mapsize, void *buffer, + unsigned long bufsiz, struct strbuf *header) +{ + int status; + + status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz); + + /* + * Check if entire header is unpacked in the first iteration. + */ + if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) + return 0; + + /* + * buffer[0..bufsiz] was not large enough. Copy the partial + * result out to header, and then append the result of further + * reading the stream. + */ + strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer); + stream->next_out = buffer; + stream->avail_out = bufsiz; + + do { + status = git_inflate(stream, 0); + strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer); + if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) + return 0; + stream->next_out = buffer; + stream->avail_out = bufsiz; + } while (status != Z_STREAM_END); + return -1; +} + static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1) { int bytes = strlen(buffer) + 1; @@ -1614,27 +1648,38 @@ static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long s * too permissive for what we want to check. So do an anal * object header parse by hand. */ -int parse_sha1_header(const char *hdr, unsigned long *sizep) +static int parse_sha1_header_extended(const char *hdr, struct object_info *oi, + unsigned int flags) { - char type[10]; - int i; + const char *type_buf = hdr; unsigned long size; + int type, type_len = 0; /* - * The type can be at most ten bytes (including the - * terminating '\0' that we add), and is followed by + * The type can be of any size but is followed by * a space. */ - i = 0; for (;;) { char c = *hdr++; if (c == ' ') break; - type[i++] = c; - if (i >= sizeof(type)) - return -1; + type_len++; } - type[i] = 0; + + type = type_from_string_gently(type_buf, type_len, 1); + if (oi->typename) + strbuf_add(oi->typename, type_buf, type_len); + /* + * Set type to 0 if its an unknown object and + * we're obtaining the type using '--allow-unkown-type' + * option. + */ + if ((flags & LOOKUP_UNKNOWN_OBJECT) && (type < 0)) + type = 0; + else if (type < 0) + die("invalid object type"); + if (oi->typep) + *oi->typep = type; /* * The length must follow immediately, and be in canonical @@ -1652,12 +1697,24 @@ int parse_sha1_header(const char *hdr, unsigned long *sizep) size = size * 10 + c; } } - *sizep = size; + + if (oi->sizep) + *oi->sizep = size; /* * The length must be followed by a zero byte */ - return *hdr ? -1 : type_from_string(type); + return *hdr ? -1 : type; +} + +int parse_sha1_header(const char *hdr, unsigned long *sizep) +{ + struct object_info oi; + + oi.sizep = sizep; + oi.typename = NULL; + oi.typep = NULL; + return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT); } static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1) @@ -2522,13 +2579,15 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1, } static int sha1_loose_object_info(const unsigned char *sha1, - struct object_info *oi) + struct object_info *oi, + int flags) { - int status; - unsigned long mapsize, size; + int status = 0; + unsigned long mapsize; void *map; git_zstream stream; char hdr[32]; + struct strbuf hdrbuf = STRBUF_INIT; if (oi->delta_base_sha1) hashclr(oi->delta_base_sha1); @@ -2541,7 +2600,7 @@ static int sha1_loose_object_info(const unsigned char *sha1, * return value implicitly indicates whether the * object even exists. */ - if (!oi->typep && !oi->sizep) { + if (!oi->typep && !oi->typename && !oi->sizep) { struct stat st; if (stat_sha1_file(sha1, &st) < 0) return -1; @@ -2555,17 +2614,26 @@ static int sha1_loose_object_info(const unsigned char *sha1, return -1; if (oi->disk_sizep) *oi->disk_sizep = mapsize; - if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) + if ((flags & LOOKUP_UNKNOWN_OBJECT)) { + if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0) + status = error("unable to unpack %s header with --allow-unknown-type", + sha1_to_hex(sha1)); + } else if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) status = error("unable to unpack %s header", sha1_to_hex(sha1)); - else if ((status = parse_sha1_header(hdr, &size)) < 0) + if (status < 0) + ; /* Do nothing */ + else if (hdrbuf.len) { + if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0) + status = error("unable to parse %s header with --allow-unknown-type", + sha1_to_hex(sha1)); + } else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0) status = error("unable to parse %s header", sha1_to_hex(sha1)); - else if (oi->sizep) - *oi->sizep = size; git_inflate_end(&stream); munmap(map, mapsize); - if (oi->typep) + if (status && oi->typep) *oi->typep = status; + strbuf_release(&hdrbuf); return 0; } @@ -2574,6 +2642,7 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, struct cached_object *co; struct pack_entry e; int rtype; + enum object_type real_type; const unsigned char *real = lookup_replace_object_extended(sha1, flags); co = find_cached_object(real); @@ -2586,13 +2655,15 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, *(oi->disk_sizep) = 0; if (oi->delta_base_sha1) hashclr(oi->delta_base_sha1); + if (oi->typename) + strbuf_addstr(oi->typename, typename(co->type)); oi->whence = OI_CACHED; return 0; } if (!find_pack_entry(real, &e)) { /* Most likely it's a loose object. */ - if (!sha1_loose_object_info(real, oi)) { + if (!sha1_loose_object_info(real, oi, flags)) { oi->whence = OI_LOOSE; return 0; } @@ -2603,9 +2674,18 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, return -1; } + /* + * packed_object_info() does not follow the delta chain to + * find out the real type, unless it is given oi->typep. + */ + if (oi->typename && !oi->typep) + oi->typep = &real_type; + rtype = packed_object_info(e.p, e.offset, oi); if (rtype < 0) { mark_bad_packed_object(e.p, real); + if (oi->typep == &real_type) + oi->typep = NULL; return sha1_object_info_extended(real, oi, 0); } else if (in_delta_base_cache(e.p, e.offset)) { oi->whence = OI_DBCACHED; @@ -2616,6 +2696,10 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || rtype == OBJ_OFS_DELTA); } + if (oi->typename) + strbuf_addstr(oi->typename, typename(*oi->typep)); + if (oi->typep == &real_type) + oi->typep = NULL; return 0; } diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index ab36b1eb72..4f225db9a7 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -47,6 +47,18 @@ $content" test_cmp expect actual ' + test_expect_success "Type of $type is correct using --allow-unknown-type" ' + echo $type >expect && + git cat-file -t --allow-unknown-type $sha1 >actual && + test_cmp expect actual + ' + + test_expect_success "Size of $type is correct using --allow-unknown-type" ' + echo $size >expect && + git cat-file -s --allow-unknown-type $sha1 >actual && + test_cmp expect actual + ' + test -z "$content" || test_expect_success "Content of $type is correct" ' maybe_remove_timestamp "$content" $no_ts >expect && @@ -296,4 +308,37 @@ test_expect_success '%(deltabase) reports packed delta bases' ' } ' +bogus_type="bogus" +bogus_content="bogus" +bogus_size=$(strlen "$bogus_content") +bogus_sha1=$(echo_without_newline "$bogus_content" | git hash-object -t $bogus_type --literally -w --stdin) + +test_expect_success "Type of broken object is correct" ' + echo $bogus_type >expect && + git cat-file -t --allow-unknown-type $bogus_sha1 >actual && + test_cmp expect actual +' + +test_expect_success "Size of broken object is correct" ' + echo $bogus_size >expect && + git cat-file -s --allow-unknown-type $bogus_sha1 >actual && + test_cmp expect actual +' +bogus_type="abcdefghijklmnopqrstuvwxyz1234679" +bogus_content="bogus" +bogus_size=$(strlen "$bogus_content") +bogus_sha1=$(echo_without_newline "$bogus_content" | git hash-object -t $bogus_type --literally -w --stdin) + +test_expect_success "Type of broken object is correct when type is large" ' + echo $bogus_type >expect && + git cat-file -t --allow-unknown-type $bogus_sha1 >actual && + test_cmp expect actual +' + +test_expect_success "Size of large broken object is correct when type is large" ' + echo $bogus_size >expect && + git cat-file -s --allow-unknown-type $bogus_sha1 >actual && + test_cmp expect actual +' + test_done |