From ad42f28d0cc72676de8ff8439c513125a1aa9e84 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 22 Jun 2015 06:40:56 -0400 Subject: cat-file: minor style fix in options list We do not put extra whitespace before the first macro argument. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 049a95f1f1..6cbccccb41 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -412,7 +412,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix) OPT_CMDMODE('p', NULL, &opt, N_("pretty-print object's content"), 'p'), OPT_CMDMODE(0, "textconv", &opt, N_("for blob objects, run textconv on object's content"), 'c'), - OPT_BOOL( 0, "allow-unknown-type", &unknown_type, + OPT_BOOL(0, "allow-unknown-type", &unknown_type, N_("allow -s and -t to work with broken/corrupt objects")), { OPTION_CALLBACK, 0, "batch", &batch, "format", N_("show info and content of objects fed from the standard input"), -- cgit v1.2.3 From bfd155943eecedb7b7e759ff022ca09e68f941b0 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 22 Jun 2015 06:41:03 -0400 Subject: cat-file: move batch_options definition to top of file That way all of the functions can make use of it. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'builtin') diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 6cbccccb41..d4101b7ada 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -10,6 +10,13 @@ #include "streaming.h" #include "tree-walk.h" +struct batch_options { + int enabled; + int follow_symlinks; + int print_contents; + const char *format; +}; + static int cat_one_file(int opt, const char *exp_type, const char *obj_name, int unknown_type) { @@ -232,12 +239,6 @@ static void print_object_or_die(int fd, struct expand_data *data) } } -struct batch_options { - int enabled; - int follow_symlinks; - int print_contents; - const char *format; -}; static int batch_one_object(const char *obj_name, struct batch_options *opt, struct expand_data *data) -- cgit v1.2.3 From fc4937c37219347f4e2c25a271577b333942453f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 22 Jun 2015 06:45:17 -0400 Subject: cat-file: add --buffer option We use a direct write() to output the results of --batch and --batch-check. This is good for processes feeding the input and reading the output interactively, but it introduces measurable overhead if you do not want this feature. For example, on linux.git: $ git rev-list --objects --all | cut -d' ' -f1 >objects $ time git cat-file --batch-check='%(objectsize)' \ /dev/null real 0m5.440s user 0m5.060s sys 0m0.384s This patch adds an option to use regular stdio buffering: $ time git cat-file --batch-check='%(objectsize)' \ --buffer /dev/null real 0m4.975s user 0m4.888s sys 0m0.092s Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'builtin') diff --git a/builtin/cat-file.c b/builtin/cat-file.c index d4101b7ada..741e100bda 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -14,6 +14,7 @@ struct batch_options { int enabled; int follow_symlinks; int print_contents; + int buffer_output; const char *format; }; @@ -211,14 +212,25 @@ static size_t expand_format(struct strbuf *sb, const char *start, void *data) return end - start + 1; } -static void print_object_or_die(int fd, struct expand_data *data) +static void batch_write(struct batch_options *opt, const void *data, int len) +{ + if (opt->buffer_output) { + if (fwrite(data, 1, len, stdout) != len) + die_errno("unable to write to stdout"); + } else + write_or_die(1, data, len); +} + +static void print_object_or_die(struct batch_options *opt, struct expand_data *data) { const unsigned char *sha1 = data->sha1; assert(data->info.typep); if (data->type == OBJ_BLOB) { - if (stream_blob_to_fd(fd, sha1, NULL, 0) < 0) + if (opt->buffer_output) + fflush(stdout); + if (stream_blob_to_fd(1, sha1, NULL, 0) < 0) die("unable to stream %s to stdout", sha1_to_hex(sha1)); } else { @@ -234,12 +246,11 @@ static void print_object_or_die(int fd, struct expand_data *data) if (data->info.sizep && size != data->size) die("object %s changed size!?", sha1_to_hex(sha1)); - write_or_die(fd, contents, size); + batch_write(opt, contents, size); free(contents); } } - static int batch_one_object(const char *obj_name, struct batch_options *opt, struct expand_data *data) { @@ -294,12 +305,12 @@ static int batch_one_object(const char *obj_name, struct batch_options *opt, strbuf_expand(&buf, opt->format, expand_format, data); strbuf_addch(&buf, '\n'); - write_or_die(1, buf.buf, buf.len); + batch_write(opt, buf.buf, buf.len); strbuf_release(&buf); if (opt->print_contents) { - print_object_or_die(1, data); - write_or_die(1, "\n", 1); + print_object_or_die(opt, data); + batch_write(opt, "\n", 1); } return 0; } @@ -415,6 +426,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix) N_("for blob objects, run textconv on object's content"), 'c'), OPT_BOOL(0, "allow-unknown-type", &unknown_type, N_("allow -s and -t to work with broken/corrupt objects")), + OPT_BOOL(0, "buffer", &batch.buffer_output, N_("buffer --batch output")), { OPTION_CALLBACK, 0, "batch", &batch, "format", N_("show info and content of objects fed from the standard input"), PARSE_OPT_OPTARG, batch_option_callback }, -- cgit v1.2.3 From 82330950d96a2c2b971ec5b29f59625bcfb62d47 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 22 Jun 2015 06:45:33 -0400 Subject: cat-file: stop returning value from batch_one_object If batch_one_object returns an error code, we stop reading input. However, it will only do so if we feed it NULL, which cannot happen; we give it the "buf" member of a strbuf, which is always non-NULL. We did originally stop on other errors (like a missing object), but this was changed in 3c076db (cat-file --batch / --batch-check: do not exit if hashes are missing, 2008-06-09). These days we keep going for any per-object error (and print "missing" when necessary). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'builtin') diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 741e100bda..7d99c157a1 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -251,17 +251,14 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d } } -static int batch_one_object(const char *obj_name, struct batch_options *opt, - struct expand_data *data) +static void batch_one_object(const char *obj_name, struct batch_options *opt, + struct expand_data *data) { struct strbuf buf = STRBUF_INIT; struct object_context ctx; int flags = opt->follow_symlinks ? GET_SHA1_FOLLOW_SYMLINKS : 0; enum follow_symlinks_result result; - if (!obj_name) - return 1; - result = get_sha1_with_context(obj_name, flags, data->sha1, &ctx); if (result != FOUND) { switch (result) { @@ -286,7 +283,7 @@ static int batch_one_object(const char *obj_name, struct batch_options *opt, break; } fflush(stdout); - return 0; + return; } if (ctx.mode == 0) { @@ -294,13 +291,13 @@ static int batch_one_object(const char *obj_name, struct batch_options *opt, (uintmax_t)ctx.symlink_path.len, ctx.symlink_path.buf); fflush(stdout); - return 0; + return; } if (sha1_object_info_extended(data->sha1, &data->info, LOOKUP_REPLACE_OBJECT) < 0) { printf("%s missing\n", obj_name); fflush(stdout); - return 0; + return; } strbuf_expand(&buf, opt->format, expand_format, data); @@ -312,7 +309,6 @@ static int batch_one_object(const char *obj_name, struct batch_options *opt, print_object_or_die(opt, data); batch_write(opt, "\n", 1); } - return 0; } static int batch_objects(struct batch_options *opt) @@ -367,9 +363,7 @@ static int batch_objects(struct batch_options *opt) data.rest = p; } - retval = batch_one_object(buf.buf, opt, &data); - if (retval) - break; + batch_one_object(buf.buf, opt, &data); } strbuf_release(&buf); -- cgit v1.2.3 From 44b877e9bc944258db096a0ec57151be7c8cbf66 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 22 Jun 2015 06:45:41 -0400 Subject: cat-file: split batch_one_object into two stages There are really two things going on in this function: 1. We convert the name we got on stdin to a sha1. 2. We look up and print information on the sha1. Let's split out the second half so that we can call it separately. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'builtin') diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 7d99c157a1..499ccda6b6 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -251,10 +251,31 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d } } +static void batch_object_write(const char *obj_name, struct batch_options *opt, + struct expand_data *data) +{ + struct strbuf buf = STRBUF_INIT; + + if (sha1_object_info_extended(data->sha1, &data->info, LOOKUP_REPLACE_OBJECT) < 0) { + printf("%s missing\n", obj_name); + fflush(stdout); + return; + } + + strbuf_expand(&buf, opt->format, expand_format, data); + strbuf_addch(&buf, '\n'); + batch_write(opt, buf.buf, buf.len); + strbuf_release(&buf); + + if (opt->print_contents) { + print_object_or_die(opt, data); + batch_write(opt, "\n", 1); + } +} + static void batch_one_object(const char *obj_name, struct batch_options *opt, struct expand_data *data) { - struct strbuf buf = STRBUF_INIT; struct object_context ctx; int flags = opt->follow_symlinks ? GET_SHA1_FOLLOW_SYMLINKS : 0; enum follow_symlinks_result result; @@ -294,21 +315,7 @@ static void batch_one_object(const char *obj_name, struct batch_options *opt, return; } - if (sha1_object_info_extended(data->sha1, &data->info, LOOKUP_REPLACE_OBJECT) < 0) { - printf("%s missing\n", obj_name); - fflush(stdout); - return; - } - - strbuf_expand(&buf, opt->format, expand_format, data); - strbuf_addch(&buf, '\n'); - batch_write(opt, buf.buf, buf.len); - strbuf_release(&buf); - - if (opt->print_contents) { - print_object_or_die(opt, data); - batch_write(opt, "\n", 1); - } + batch_object_write(obj_name, opt, data); } static int batch_objects(struct batch_options *opt) -- cgit v1.2.3 From 6a951937ae1abb5fe438bfb41ebb28c5abe0419d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 22 Jun 2015 06:45:59 -0400 Subject: cat-file: add --batch-all-objects option It can sometimes be useful to examine all objects in the repository. Normally this is done with "git rev-list --all --objects", but: 1. That shows only reachable objects. You may want to look at all available objects. 2. It's slow. We actually open each object to walk the graph. If your operation is OK with seeing unreachable objects, it's an order of magnitude faster to just enumerate the loose directories and pack indices. You can do this yourself using "ls" and "git show-index", but it's non-obvious. This patch adds an option to "cat-file --batch-check" to operate on all available objects (rather than reading names from stdin). This is based on a proposal by Charles Bailey to provide a separate "git list-all-objects" command. That is more orthogonal, as it splits enumerating the objects from getting information about them. However, in practice you will either: a. Feed the list of objects directly into cat-file anyway, so you can find out information about them. Keeping it in a single process is more efficient. b. Ask the listing process to start telling you more information about the objects, in which case you will reinvent cat-file's batch-check formatter. Adding a cat-file option is simple and efficient. And if you really do want just the object names, you can always do: git cat-file --batch-check='%(objectname)' --batch-all-objects Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 44 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) (limited to 'builtin') diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 499ccda6b6..95604c4a63 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -15,6 +15,7 @@ struct batch_options { int follow_symlinks; int print_contents; int buffer_output; + int all_objects; const char *format; }; @@ -257,7 +258,7 @@ static void batch_object_write(const char *obj_name, struct batch_options *opt, struct strbuf buf = STRBUF_INIT; if (sha1_object_info_extended(data->sha1, &data->info, LOOKUP_REPLACE_OBJECT) < 0) { - printf("%s missing\n", obj_name); + printf("%s missing\n", obj_name ? obj_name : sha1_to_hex(data->sha1)); fflush(stdout); return; } @@ -318,6 +319,34 @@ static void batch_one_object(const char *obj_name, struct batch_options *opt, batch_object_write(obj_name, opt, data); } +struct object_cb_data { + struct batch_options *opt; + struct expand_data *expand; +}; + +static int batch_object_cb(const unsigned char *sha1, + struct object_cb_data *data) +{ + hashcpy(data->expand->sha1, sha1); + batch_object_write(NULL, data->opt, data->expand); + return 0; +} + +static int batch_loose_object(const unsigned char *sha1, + const char *path, + void *data) +{ + return batch_object_cb(sha1, data); +} + +static int batch_packed_object(const unsigned char *sha1, + struct packed_git *pack, + uint32_t pos, + void *data) +{ + return batch_object_cb(sha1, data); +} + static int batch_objects(struct batch_options *opt) { struct strbuf buf = STRBUF_INIT; @@ -345,6 +374,15 @@ static int batch_objects(struct batch_options *opt) if (opt->print_contents) data.info.typep = &data.type; + if (opt->all_objects) { + struct object_cb_data cb; + cb.opt = opt; + cb.expand = &data; + for_each_loose_object(batch_loose_object, &cb, 0); + for_each_packed_object(batch_packed_object, &cb, 0); + return 0; + } + /* * We are going to call get_sha1 on a potentially very large number of * objects. In most large cases, these will be actual object sha1s. The @@ -436,6 +474,8 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix) PARSE_OPT_OPTARG, batch_option_callback }, OPT_BOOL(0, "follow-symlinks", &batch.follow_symlinks, N_("follow in-tree symlinks (used with --batch or --batch-check)")), + OPT_BOOL(0, "batch-all-objects", &batch.all_objects, + N_("show all objects with --batch or --batch-check")), OPT_END() }; @@ -460,7 +500,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix) usage_with_options(cat_file_usage, options); } - if (batch.follow_symlinks && !batch.enabled) { + if ((batch.follow_symlinks || batch.all_objects) && !batch.enabled) { usage_with_options(cat_file_usage, options); } -- cgit v1.2.3 From 3115ee45c8c7c0b753663890b13ec0e14fe4c0d7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 22 Jun 2015 07:06:32 -0400 Subject: cat-file: sort and de-dup output of --batch-all-objects The sorting we could probably live without, but printing duplicates is just a hassle for the user, who must then de-dup themselves (or risk a wrong answer if they are doing something like counting objects with a particular property). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'builtin') diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 95604c4a63..07baad1e59 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -9,6 +9,7 @@ #include "userdiff.h" #include "streaming.h" #include "tree-walk.h" +#include "sha1-array.h" struct batch_options { int enabled; @@ -324,19 +325,19 @@ struct object_cb_data { struct expand_data *expand; }; -static int batch_object_cb(const unsigned char *sha1, - struct object_cb_data *data) +static void batch_object_cb(const unsigned char sha1[20], void *vdata) { + struct object_cb_data *data = vdata; hashcpy(data->expand->sha1, sha1); batch_object_write(NULL, data->opt, data->expand); - return 0; } static int batch_loose_object(const unsigned char *sha1, const char *path, void *data) { - return batch_object_cb(sha1, data); + sha1_array_append(data, sha1); + return 0; } static int batch_packed_object(const unsigned char *sha1, @@ -344,7 +345,8 @@ static int batch_packed_object(const unsigned char *sha1, uint32_t pos, void *data) { - return batch_object_cb(sha1, data); + sha1_array_append(data, sha1); + return 0; } static int batch_objects(struct batch_options *opt) @@ -375,11 +377,17 @@ static int batch_objects(struct batch_options *opt) data.info.typep = &data.type; if (opt->all_objects) { + struct sha1_array sa = SHA1_ARRAY_INIT; struct object_cb_data cb; + + for_each_loose_object(batch_loose_object, &sa, 0); + for_each_packed_object(batch_packed_object, &sa, 0); + cb.opt = opt; cb.expand = &data; - for_each_loose_object(batch_loose_object, &cb, 0); - for_each_packed_object(batch_packed_object, &cb, 0); + sha1_array_for_each_unique(&sa, batch_object_cb, &cb); + + sha1_array_clear(&sa); return 0; } -- cgit v1.2.3