summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar Junio C Hamano <gitster@pobox.com>2015-05-19 13:17:58 -0700
committerLibravatar Junio C Hamano <gitster@pobox.com>2015-05-19 13:17:58 -0700
commit3b7d373ae2618690170f93bd19883915851de903 (patch)
treec95dbebc54fc60a920479b96836b3f35e245ae9f
parentMerge branch 'nd/dwim-wildcards-as-pathspecs' (diff)
parentt1006: add tests for git cat-file --allow-unknown-type (diff)
downloadtgif-3b7d373ae2618690170f93bd19883915851de903.tar.xz
Merge branch 'kn/cat-file-literally'
Add the "--allow-unknown-type" option to "cat-file" to allow inspecting loose objects of an experimental or a broken type. * kn/cat-file-literally: t1006: add tests for git cat-file --allow-unknown-type cat-file: teach cat-file a '--allow-unknown-type' option cat-file: make the options mutually exclusive sha1_file: support reading from a loose object of unknown type
-rw-r--r--Documentation/git-cat-file.txt5
-rw-r--r--builtin/cat-file.c51
-rw-r--r--cache.h2
-rw-r--r--sha1_file.c128
-rwxr-xr-xt/t1006-cat-file.sh45
5 files changed, 188 insertions, 43 deletions
diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
index f6a16f4300..499ae7b98a 100644
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@@ -9,7 +9,7 @@ git-cat-file - Provide content or type and size information for repository objec
SYNOPSIS
--------
[verse]
-'git cat-file' (-t | -s | -e | -p | <type> | --textconv ) <object>
+'git cat-file' (-t [--allow-unknown-type]| -s [--allow-unknown-type]| -e | -p | <type> | --textconv ) <object>
'git cat-file' (--batch | --batch-check) < <list-of-objects>
DESCRIPTION
@@ -69,6 +69,9 @@ OPTIONS
not be combined with any other options or arguments. See the
section `BATCH OUTPUT` below for details.
+--allow-unknown-type::
+ Allow -s or -t to query broken/corrupt objects of unknown type.
+
OUTPUT
------
If '-t' is specified, one of the <type>.
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index df99df4db1..ecb488822f 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -9,13 +9,20 @@
#include "userdiff.h"
#include "streaming.h"
-static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
+static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
+ int unknown_type)
{
unsigned char sha1[20];
enum object_type type;
char *buf;
unsigned long size;
struct object_context obj_context;
+ struct object_info oi = {NULL};
+ struct strbuf sb = STRBUF_INIT;
+ unsigned flags = LOOKUP_REPLACE_OBJECT;
+
+ if (unknown_type)
+ flags |= LOOKUP_UNKNOWN_OBJECT;
if (get_sha1_with_context(obj_name, 0, sha1, &obj_context))
die("Not a valid object name %s", obj_name);
@@ -23,20 +30,22 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
buf = NULL;
switch (opt) {
case 't':
- type = sha1_object_info(sha1, NULL);
- if (type > 0) {
- printf("%s\n", typename(type));
+ oi.typename = &sb;
+ if (sha1_object_info_extended(sha1, &oi, flags) < 0)
+ die("git cat-file: could not get object info");
+ if (sb.len) {
+ printf("%s\n", sb.buf);
+ strbuf_release(&sb);
return 0;
}
break;
case 's':
- type = sha1_object_info(sha1, &size);
- if (type > 0) {
- printf("%lu\n", size);
- return 0;
- }
- break;
+ oi.sizep = &size;
+ if (sha1_object_info_extended(sha1, &oi, flags) < 0)
+ die("git cat-file: could not get object info");
+ printf("%lu\n", size);
+ return 0;
case 'e':
return !has_sha1_file(sha1);
@@ -323,7 +332,7 @@ static int batch_objects(struct batch_options *opt)
}
static const char * const cat_file_usage[] = {
- N_("git cat-file (-t | -s | -e | -p | <type> | --textconv) <object>"),
+ N_("git cat-file (-t [--allow-unknown-type]|-s [--allow-unknown-type]|-e|-p|<type>|--textconv) <object>"),
N_("git cat-file (--batch | --batch-check) < <list-of-objects>"),
NULL
};
@@ -359,16 +368,19 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
int opt = 0;
const char *exp_type = NULL, *obj_name = NULL;
struct batch_options batch = {0};
+ int unknown_type = 0;
const struct option options[] = {
OPT_GROUP(N_("<type> can be one of: blob, tree, commit, tag")),
- OPT_SET_INT('t', NULL, &opt, N_("show object type"), 't'),
- OPT_SET_INT('s', NULL, &opt, N_("show object size"), 's'),
- OPT_SET_INT('e', NULL, &opt,
+ OPT_CMDMODE('t', NULL, &opt, N_("show object type"), 't'),
+ OPT_CMDMODE('s', NULL, &opt, N_("show object size"), 's'),
+ OPT_CMDMODE('e', NULL, &opt,
N_("exit with zero when there's no error"), 'e'),
- OPT_SET_INT('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
- OPT_SET_INT(0, "textconv", &opt,
+ OPT_CMDMODE('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
+ OPT_CMDMODE(0, "textconv", &opt,
N_("for blob objects, run textconv on object's content"), 'c'),
+ OPT_BOOL( 0, "allow-unknown-type", &unknown_type,
+ N_("allow -s and -t to work with broken/corrupt objects")),
{ OPTION_CALLBACK, 0, "batch", &batch, "format",
N_("show info and content of objects fed from the standard input"),
PARSE_OPT_OPTARG, batch_option_callback },
@@ -380,9 +392,6 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
git_config(git_cat_file_config, NULL);
- if (argc != 3 && argc != 2)
- usage_with_options(cat_file_usage, options);
-
argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0);
if (opt) {
@@ -405,5 +414,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
if (batch.enabled)
return batch_objects(&batch);
- return cat_one_file(opt, exp_type, obj_name);
+ if (unknown_type && opt != 't' && opt != 's')
+ die("git cat-file --allow-unknown-type: use with -s or -t");
+ return cat_one_file(opt, exp_type, obj_name, unknown_type);
}
diff --git a/cache.h b/cache.h
index 54f108a28f..c97d8071e1 100644
--- a/cache.h
+++ b/cache.h
@@ -879,6 +879,7 @@ extern char *xdg_config_home(const char *filename);
/* object replacement */
#define LOOKUP_REPLACE_OBJECT 1
+#define LOOKUP_UNKNOWN_OBJECT 2
extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag);
static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size)
{
@@ -1351,6 +1352,7 @@ struct object_info {
unsigned long *sizep;
unsigned long *disk_sizep;
unsigned char *delta_base_sha1;
+ struct strbuf *typename;
/* Response */
enum {
diff --git a/sha1_file.c b/sha1_file.c
index 866021b2b1..ccc6dac54b 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1564,6 +1564,40 @@ int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long ma
return git_inflate(stream, 0);
}
+static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map,
+ unsigned long mapsize, void *buffer,
+ unsigned long bufsiz, struct strbuf *header)
+{
+ int status;
+
+ status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz);
+
+ /*
+ * Check if entire header is unpacked in the first iteration.
+ */
+ if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
+ return 0;
+
+ /*
+ * buffer[0..bufsiz] was not large enough. Copy the partial
+ * result out to header, and then append the result of further
+ * reading the stream.
+ */
+ strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
+ stream->next_out = buffer;
+ stream->avail_out = bufsiz;
+
+ do {
+ status = git_inflate(stream, 0);
+ strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
+ if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
+ return 0;
+ stream->next_out = buffer;
+ stream->avail_out = bufsiz;
+ } while (status != Z_STREAM_END);
+ return -1;
+}
+
static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
{
int bytes = strlen(buffer) + 1;
@@ -1614,27 +1648,38 @@ static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long s
* too permissive for what we want to check. So do an anal
* object header parse by hand.
*/
-int parse_sha1_header(const char *hdr, unsigned long *sizep)
+static int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
+ unsigned int flags)
{
- char type[10];
- int i;
+ const char *type_buf = hdr;
unsigned long size;
+ int type, type_len = 0;
/*
- * The type can be at most ten bytes (including the
- * terminating '\0' that we add), and is followed by
+ * The type can be of any size but is followed by
* a space.
*/
- i = 0;
for (;;) {
char c = *hdr++;
if (c == ' ')
break;
- type[i++] = c;
- if (i >= sizeof(type))
- return -1;
+ type_len++;
}
- type[i] = 0;
+
+ type = type_from_string_gently(type_buf, type_len, 1);
+ if (oi->typename)
+ strbuf_add(oi->typename, type_buf, type_len);
+ /*
+ * Set type to 0 if its an unknown object and
+ * we're obtaining the type using '--allow-unkown-type'
+ * option.
+ */
+ if ((flags & LOOKUP_UNKNOWN_OBJECT) && (type < 0))
+ type = 0;
+ else if (type < 0)
+ die("invalid object type");
+ if (oi->typep)
+ *oi->typep = type;
/*
* The length must follow immediately, and be in canonical
@@ -1652,12 +1697,24 @@ int parse_sha1_header(const char *hdr, unsigned long *sizep)
size = size * 10 + c;
}
}
- *sizep = size;
+
+ if (oi->sizep)
+ *oi->sizep = size;
/*
* The length must be followed by a zero byte
*/
- return *hdr ? -1 : type_from_string(type);
+ return *hdr ? -1 : type;
+}
+
+int parse_sha1_header(const char *hdr, unsigned long *sizep)
+{
+ struct object_info oi;
+
+ oi.sizep = sizep;
+ oi.typename = NULL;
+ oi.typep = NULL;
+ return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);
}
static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1)
@@ -2522,13 +2579,15 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
}
static int sha1_loose_object_info(const unsigned char *sha1,
- struct object_info *oi)
+ struct object_info *oi,
+ int flags)
{
- int status;
- unsigned long mapsize, size;
+ int status = 0;
+ unsigned long mapsize;
void *map;
git_zstream stream;
char hdr[32];
+ struct strbuf hdrbuf = STRBUF_INIT;
if (oi->delta_base_sha1)
hashclr(oi->delta_base_sha1);
@@ -2541,7 +2600,7 @@ static int sha1_loose_object_info(const unsigned char *sha1,
* return value implicitly indicates whether the
* object even exists.
*/
- if (!oi->typep && !oi->sizep) {
+ if (!oi->typep && !oi->typename && !oi->sizep) {
struct stat st;
if (stat_sha1_file(sha1, &st) < 0)
return -1;
@@ -2555,17 +2614,26 @@ static int sha1_loose_object_info(const unsigned char *sha1,
return -1;
if (oi->disk_sizep)
*oi->disk_sizep = mapsize;
- if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
+ if ((flags & LOOKUP_UNKNOWN_OBJECT)) {
+ if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0)
+ status = error("unable to unpack %s header with --allow-unknown-type",
+ sha1_to_hex(sha1));
+ } else if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
status = error("unable to unpack %s header",
sha1_to_hex(sha1));
- else if ((status = parse_sha1_header(hdr, &size)) < 0)
+ if (status < 0)
+ ; /* Do nothing */
+ else if (hdrbuf.len) {
+ if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0)
+ status = error("unable to parse %s header with --allow-unknown-type",
+ sha1_to_hex(sha1));
+ } else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0)
status = error("unable to parse %s header", sha1_to_hex(sha1));
- else if (oi->sizep)
- *oi->sizep = size;
git_inflate_end(&stream);
munmap(map, mapsize);
- if (oi->typep)
+ if (status && oi->typep)
*oi->typep = status;
+ strbuf_release(&hdrbuf);
return 0;
}
@@ -2574,6 +2642,7 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
struct cached_object *co;
struct pack_entry e;
int rtype;
+ enum object_type real_type;
const unsigned char *real = lookup_replace_object_extended(sha1, flags);
co = find_cached_object(real);
@@ -2586,13 +2655,15 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
*(oi->disk_sizep) = 0;
if (oi->delta_base_sha1)
hashclr(oi->delta_base_sha1);
+ if (oi->typename)
+ strbuf_addstr(oi->typename, typename(co->type));
oi->whence = OI_CACHED;
return 0;
}
if (!find_pack_entry(real, &e)) {
/* Most likely it's a loose object. */
- if (!sha1_loose_object_info(real, oi)) {
+ if (!sha1_loose_object_info(real, oi, flags)) {
oi->whence = OI_LOOSE;
return 0;
}
@@ -2603,9 +2674,18 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
return -1;
}
+ /*
+ * packed_object_info() does not follow the delta chain to
+ * find out the real type, unless it is given oi->typep.
+ */
+ if (oi->typename && !oi->typep)
+ oi->typep = &real_type;
+
rtype = packed_object_info(e.p, e.offset, oi);
if (rtype < 0) {
mark_bad_packed_object(e.p, real);
+ if (oi->typep == &real_type)
+ oi->typep = NULL;
return sha1_object_info_extended(real, oi, 0);
} else if (in_delta_base_cache(e.p, e.offset)) {
oi->whence = OI_DBCACHED;
@@ -2616,6 +2696,10 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
rtype == OBJ_OFS_DELTA);
}
+ if (oi->typename)
+ strbuf_addstr(oi->typename, typename(*oi->typep));
+ if (oi->typep == &real_type)
+ oi->typep = NULL;
return 0;
}
diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index ab36b1eb72..4f225db9a7 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -47,6 +47,18 @@ $content"
test_cmp expect actual
'
+ test_expect_success "Type of $type is correct using --allow-unknown-type" '
+ echo $type >expect &&
+ git cat-file -t --allow-unknown-type $sha1 >actual &&
+ test_cmp expect actual
+ '
+
+ test_expect_success "Size of $type is correct using --allow-unknown-type" '
+ echo $size >expect &&
+ git cat-file -s --allow-unknown-type $sha1 >actual &&
+ test_cmp expect actual
+ '
+
test -z "$content" ||
test_expect_success "Content of $type is correct" '
maybe_remove_timestamp "$content" $no_ts >expect &&
@@ -296,4 +308,37 @@ test_expect_success '%(deltabase) reports packed delta bases' '
}
'
+bogus_type="bogus"
+bogus_content="bogus"
+bogus_size=$(strlen "$bogus_content")
+bogus_sha1=$(echo_without_newline "$bogus_content" | git hash-object -t $bogus_type --literally -w --stdin)
+
+test_expect_success "Type of broken object is correct" '
+ echo $bogus_type >expect &&
+ git cat-file -t --allow-unknown-type $bogus_sha1 >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success "Size of broken object is correct" '
+ echo $bogus_size >expect &&
+ git cat-file -s --allow-unknown-type $bogus_sha1 >actual &&
+ test_cmp expect actual
+'
+bogus_type="abcdefghijklmnopqrstuvwxyz1234679"
+bogus_content="bogus"
+bogus_size=$(strlen "$bogus_content")
+bogus_sha1=$(echo_without_newline "$bogus_content" | git hash-object -t $bogus_type --literally -w --stdin)
+
+test_expect_success "Type of broken object is correct when type is large" '
+ echo $bogus_type >expect &&
+ git cat-file -t --allow-unknown-type $bogus_sha1 >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success "Size of large broken object is correct when type is large" '
+ echo $bogus_size >expect &&
+ git cat-file -s --allow-unknown-type $bogus_sha1 >actual &&
+ test_cmp expect actual
+'
+
test_done