summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar Junio C Hamano <gitster@pobox.com>2018-08-20 11:33:52 -0700
committerLibravatar Junio C Hamano <gitster@pobox.com>2018-08-20 11:33:52 -0700
commit0c54cdaf6580f121919048633e85772d60b8fb17 (patch)
treee2c0985b9c692678a0f9972047ae837b86d59879
parentMerge branch 'ab/fetch-tags-noclobber' (diff)
parentfor_each_*_object: move declarations to object-store.h (diff)
downloadtgif-0c54cdaf6580f121919048633e85772d60b8fb17.tar.xz
Merge branch 'jk/for-each-object-iteration'
The API to iterate over all objects learned to optionally list objects in the order they appear in packfiles, which helps locality of access if the caller accesses these objects while as objects are enumerated. * jk/for-each-object-iteration: for_each_*_object: move declarations to object-store.h cat-file: use a single strbuf for all output cat-file: split batch "buf" into two variables cat-file: use oidset check-and-insert cat-file: support "unordered" output for --batch-all-objects cat-file: rename batch_{loose,packed}_object callbacks t1006: test cat-file --batch-all-objects with duplicates for_each_packed_object: support iterating in pack-order for_each_*_object: give more comprehensive docstrings for_each_*_object: take flag arguments as enum for_each_*_object: store flag definitions in a single location
-rw-r--r--Documentation/git-cat-file.txt10
-rw-r--r--builtin/cat-file.c109
-rw-r--r--builtin/prune-packed.c1
-rw-r--r--cache.h56
-rw-r--r--commit-graph.c2
-rw-r--r--object-store.h90
-rw-r--r--packfile.c24
-rw-r--r--packfile.h17
-rw-r--r--sha1-file.c3
-rwxr-xr-xt/t1006-cat-file.sh17
10 files changed, 218 insertions, 111 deletions
diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
index f90f09b03f..74013335a1 100644
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@@ -104,6 +104,16 @@ OPTIONS
buffering; this is much more efficient when invoking
`--batch-check` on a large number of objects.
+--unordered::
+ When `--batch-all-objects` is in use, visit objects in an
+ order which may be more efficient for accessing the object
+ contents than hash order. The exact details of the order are
+ unspecified, but if you do not require a specific order, this
+ should generally result in faster output, especially with
+ `--batch`. Note that `cat-file` will still show each object
+ only once, even if it is stored multiple times in the
+ repository.
+
--allow-unknown-type::
Allow -s or -t to query broken/corrupt objects of unknown type.
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 4a44b2404f..08dced2614 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -21,6 +21,7 @@ struct batch_options {
int print_contents;
int buffer_output;
int all_objects;
+ int unordered;
int cmdmode; /* may be 'w' or 'c' for --filters or --textconv */
const char *format;
};
@@ -337,11 +338,11 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
}
}
-static void batch_object_write(const char *obj_name, struct batch_options *opt,
+static void batch_object_write(const char *obj_name,
+ struct strbuf *scratch,
+ struct batch_options *opt,
struct expand_data *data)
{
- struct strbuf buf = STRBUF_INIT;
-
if (!data->skip_object_info &&
oid_object_info_extended(the_repository, &data->oid, &data->info,
OBJECT_INFO_LOOKUP_REPLACE) < 0) {
@@ -351,10 +352,10 @@ static void batch_object_write(const char *obj_name, struct batch_options *opt,
return;
}
- strbuf_expand(&buf, opt->format, expand_format, data);
- strbuf_addch(&buf, '\n');
- batch_write(opt, buf.buf, buf.len);
- strbuf_release(&buf);
+ strbuf_reset(scratch);
+ strbuf_expand(scratch, opt->format, expand_format, data);
+ strbuf_addch(scratch, '\n');
+ batch_write(opt, scratch->buf, scratch->len);
if (opt->print_contents) {
print_object_or_die(opt, data);
@@ -362,7 +363,9 @@ static void batch_object_write(const char *obj_name, struct batch_options *opt,
}
}
-static void batch_one_object(const char *obj_name, struct batch_options *opt,
+static void batch_one_object(const char *obj_name,
+ struct strbuf *scratch,
+ struct batch_options *opt,
struct expand_data *data)
{
struct object_context ctx;
@@ -404,42 +407,70 @@ static void batch_one_object(const char *obj_name, struct batch_options *opt,
return;
}
- batch_object_write(obj_name, opt, data);
+ batch_object_write(obj_name, scratch, opt, data);
}
struct object_cb_data {
struct batch_options *opt;
struct expand_data *expand;
+ struct oidset *seen;
+ struct strbuf *scratch;
};
static int batch_object_cb(const struct object_id *oid, void *vdata)
{
struct object_cb_data *data = vdata;
oidcpy(&data->expand->oid, oid);
- batch_object_write(NULL, data->opt, data->expand);
+ batch_object_write(NULL, data->scratch, data->opt, data->expand);
return 0;
}
-static int batch_loose_object(const struct object_id *oid,
- const char *path,
- void *data)
+static int collect_loose_object(const struct object_id *oid,
+ const char *path,
+ void *data)
{
oid_array_append(data, oid);
return 0;
}
-static int batch_packed_object(const struct object_id *oid,
- struct packed_git *pack,
- uint32_t pos,
- void *data)
+static int collect_packed_object(const struct object_id *oid,
+ struct packed_git *pack,
+ uint32_t pos,
+ void *data)
{
oid_array_append(data, oid);
return 0;
}
+static int batch_unordered_object(const struct object_id *oid, void *vdata)
+{
+ struct object_cb_data *data = vdata;
+
+ if (oidset_insert(data->seen, oid))
+ return 0;
+
+ return batch_object_cb(oid, data);
+}
+
+static int batch_unordered_loose(const struct object_id *oid,
+ const char *path,
+ void *data)
+{
+ return batch_unordered_object(oid, data);
+}
+
+static int batch_unordered_packed(const struct object_id *oid,
+ struct packed_git *pack,
+ uint32_t pos,
+ void *data)
+{
+ return batch_unordered_object(oid, data);
+}
+
static int batch_objects(struct batch_options *opt)
{
- struct strbuf buf = STRBUF_INIT;
+ struct strbuf input = STRBUF_INIT;
+ struct strbuf output = STRBUF_INIT;
struct expand_data data;
int save_warning;
int retval = 0;
@@ -454,8 +485,9 @@ static int batch_objects(struct batch_options *opt)
*/
memset(&data, 0, sizeof(data));
data.mark_query = 1;
- strbuf_expand(&buf, opt->format, expand_format, &data);
+ strbuf_expand(&output, opt->format, expand_format, &data);
data.mark_query = 0;
+ strbuf_release(&output);
if (opt->cmdmode)
data.split_on_whitespace = 1;
@@ -473,19 +505,37 @@ static int batch_objects(struct batch_options *opt)
data.info.typep = &data.type;
if (opt->all_objects) {
- struct oid_array sa = OID_ARRAY_INIT;
struct object_cb_data cb;
- for_each_loose_object(batch_loose_object, &sa, 0);
- for_each_packed_object(batch_packed_object, &sa, 0);
if (repository_format_partial_clone)
warning("This repository has extensions.partialClone set. Some objects may not be loaded.");
cb.opt = opt;
cb.expand = &data;
- oid_array_for_each_unique(&sa, batch_object_cb, &cb);
+ cb.scratch = &output;
+
+ if (opt->unordered) {
+ struct oidset seen = OIDSET_INIT;
+
+ cb.seen = &seen;
+
+ for_each_loose_object(batch_unordered_loose, &cb, 0);
+ for_each_packed_object(batch_unordered_packed, &cb,
+ FOR_EACH_OBJECT_PACK_ORDER);
+
+ oidset_clear(&seen);
+ } else {
+ struct oid_array sa = OID_ARRAY_INIT;
+
+ for_each_loose_object(collect_loose_object, &sa, 0);
+ for_each_packed_object(collect_packed_object, &sa, 0);
+
+ oid_array_for_each_unique(&sa, batch_object_cb, &cb);
+
+ oid_array_clear(&sa);
+ }
- oid_array_clear(&sa);
+ strbuf_release(&output);
return 0;
}
@@ -499,14 +549,14 @@ static int batch_objects(struct batch_options *opt)
save_warning = warn_on_object_refname_ambiguity;
warn_on_object_refname_ambiguity = 0;
- while (strbuf_getline(&buf, stdin) != EOF) {
+ while (strbuf_getline(&input, stdin) != EOF) {
if (data.split_on_whitespace) {
/*
* Split at first whitespace, tying off the beginning
* of the string and saving the remainder (or NULL) in
* data.rest.
*/
- char *p = strpbrk(buf.buf, " \t");
+ char *p = strpbrk(input.buf, " \t");
if (p) {
while (*p && strchr(" \t", *p))
*p++ = '\0';
@@ -514,10 +564,11 @@ static int batch_objects(struct batch_options *opt)
data.rest = p;
}
- batch_one_object(buf.buf, opt, &data);
+ batch_one_object(input.buf, &output, opt, &data);
}
- strbuf_release(&buf);
+ strbuf_release(&input);
+ strbuf_release(&output);
warn_on_object_refname_ambiguity = save_warning;
return retval;
}
@@ -586,6 +637,8 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
N_("follow in-tree symlinks (used with --batch or --batch-check)")),
OPT_BOOL(0, "batch-all-objects", &batch.all_objects,
N_("show all objects with --batch or --batch-check")),
+ OPT_BOOL(0, "unordered", &batch.unordered,
+ N_("do not order --batch-all-objects output")),
OPT_END()
};
diff --git a/builtin/prune-packed.c b/builtin/prune-packed.c
index 4ff525e50f..a9e7b552b9 100644
--- a/builtin/prune-packed.c
+++ b/builtin/prune-packed.c
@@ -3,6 +3,7 @@
#include "progress.h"
#include "parse-options.h"
#include "packfile.h"
+#include "object-store.h"
static const char * const prune_packed_usage[] = {
N_("git prune-packed [-n | --dry-run] [-q | --quiet]"),
diff --git a/cache.h b/cache.h
index 1398b2a4e4..66d3b91cdb 100644
--- a/cache.h
+++ b/cache.h
@@ -1576,62 +1576,6 @@ extern int odb_mkstemp(struct strbuf *temp_filename, const char *pattern);
extern int odb_pack_keep(const char *name);
/*
- * Iterate over the files in the loose-object parts of the object
- * directory "path", triggering the following callbacks:
- *
- * - loose_object is called for each loose object we find.
- *
- * - loose_cruft is called for any files that do not appear to be
- * loose objects. Note that we only look in the loose object
- * directories "objects/[0-9a-f]{2}/", so we will not report
- * "objects/foobar" as cruft.
- *
- * - loose_subdir is called for each top-level hashed subdirectory
- * of the object directory (e.g., "$OBJDIR/f0"). It is called
- * after the objects in the directory are processed.
- *
- * Any callback that is NULL will be ignored. Callbacks returning non-zero
- * will end the iteration.
- *
- * In the "buf" variant, "path" is a strbuf which will also be used as a
- * scratch buffer, but restored to its original contents before
- * the function returns.
- */
-typedef int each_loose_object_fn(const struct object_id *oid,
- const char *path,
- void *data);
-typedef int each_loose_cruft_fn(const char *basename,
- const char *path,
- void *data);
-typedef int each_loose_subdir_fn(unsigned int nr,
- const char *path,
- void *data);
-int for_each_file_in_obj_subdir(unsigned int subdir_nr,
- struct strbuf *path,
- each_loose_object_fn obj_cb,
- each_loose_cruft_fn cruft_cb,
- each_loose_subdir_fn subdir_cb,
- void *data);
-int for_each_loose_file_in_objdir(const char *path,
- each_loose_object_fn obj_cb,
- each_loose_cruft_fn cruft_cb,
- each_loose_subdir_fn subdir_cb,
- void *data);
-int for_each_loose_file_in_objdir_buf(struct strbuf *path,
- each_loose_object_fn obj_cb,
- each_loose_cruft_fn cruft_cb,
- each_loose_subdir_fn subdir_cb,
- void *data);
-
-/*
- * Iterate over loose objects in both the local
- * repository and any alternates repositories (unless the
- * LOCAL_ONLY flag is set).
- */
-#define FOR_EACH_OBJECT_LOCAL_ONLY 0x1
-extern int for_each_loose_object(each_loose_object_fn, void *, unsigned flags);
-
-/*
* Set this to 0 to prevent sha1_object_info_extended() from fetching missing
* blobs. This has a difference only if extensions.partialClone is set.
*
diff --git a/commit-graph.c b/commit-graph.c
index 0034740c26..8a1bec7b8a 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -730,7 +730,7 @@ void write_commit_graph(const char *obj_dir,
die(_("error adding pack %s"), packname.buf);
if (open_pack_index(p))
die(_("error opening index for %s"), packname.buf);
- for_each_object_in_pack(p, add_packed_commits, &oids);
+ for_each_object_in_pack(p, add_packed_commits, &oids, 0);
close_pack(p);
}
strbuf_release(&packname);
diff --git a/object-store.h b/object-store.h
index e481f7ad41..162156f5dc 100644
--- a/object-store.h
+++ b/object-store.h
@@ -262,4 +262,94 @@ int oid_object_info_extended(struct repository *r,
const struct object_id *,
struct object_info *, unsigned flags);
+/*
+ * Iterate over the files in the loose-object parts of the object
+ * directory "path", triggering the following callbacks:
+ *
+ * - loose_object is called for each loose object we find.
+ *
+ * - loose_cruft is called for any files that do not appear to be
+ * loose objects. Note that we only look in the loose object
+ * directories "objects/[0-9a-f]{2}/", so we will not report
+ * "objects/foobar" as cruft.
+ *
+ * - loose_subdir is called for each top-level hashed subdirectory
+ * of the object directory (e.g., "$OBJDIR/f0"). It is called
+ * after the objects in the directory are processed.
+ *
+ * Any callback that is NULL will be ignored. Callbacks returning non-zero
+ * will end the iteration.
+ *
+ * In the "buf" variant, "path" is a strbuf which will also be used as a
+ * scratch buffer, but restored to its original contents before
+ * the function returns.
+ */
+typedef int each_loose_object_fn(const struct object_id *oid,
+ const char *path,
+ void *data);
+typedef int each_loose_cruft_fn(const char *basename,
+ const char *path,
+ void *data);
+typedef int each_loose_subdir_fn(unsigned int nr,
+ const char *path,
+ void *data);
+int for_each_file_in_obj_subdir(unsigned int subdir_nr,
+ struct strbuf *path,
+ each_loose_object_fn obj_cb,
+ each_loose_cruft_fn cruft_cb,
+ each_loose_subdir_fn subdir_cb,
+ void *data);
+int for_each_loose_file_in_objdir(const char *path,
+ each_loose_object_fn obj_cb,
+ each_loose_cruft_fn cruft_cb,
+ each_loose_subdir_fn subdir_cb,
+ void *data);
+int for_each_loose_file_in_objdir_buf(struct strbuf *path,
+ each_loose_object_fn obj_cb,
+ each_loose_cruft_fn cruft_cb,
+ each_loose_subdir_fn subdir_cb,
+ void *data);
+
+/* Flags for for_each_*_object() below. */
+enum for_each_object_flags {
+ /* Iterate only over local objects, not alternates. */
+ FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0),
+
+ /* Only iterate over packs obtained from the promisor remote. */
+ FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1),
+
+ /*
+ * Visit objects within a pack in packfile order rather than .idx order
+ */
+ FOR_EACH_OBJECT_PACK_ORDER = (1<<2),
+};
+
+/*
+ * Iterate over all accessible loose objects without respect to
+ * reachability. By default, this includes both local and alternate objects.
+ * The order in which objects are visited is unspecified.
+ *
+ * Any flags specific to packs are ignored.
+ */
+int for_each_loose_object(each_loose_object_fn, void *,
+ enum for_each_object_flags flags);
+
+/*
+ * Iterate over all accessible packed objects without respect to reachability.
+ * By default, this includes both local and alternate packs.
+ *
+ * Note that some objects may appear twice if they are found in multiple packs.
+ * Each pack is visited in an unspecified order. By default, objects within a
+ * pack are visited in pack-idx order (i.e., sorted by oid).
+ */
+typedef int each_packed_object_fn(const struct object_id *oid,
+ struct packed_git *pack,
+ uint32_t pos,
+ void *data);
+int for_each_object_in_pack(struct packed_git *p,
+ each_packed_object_fn, void *data,
+ enum for_each_object_flags flags);
+int for_each_packed_object(each_packed_object_fn, void *,
+ enum for_each_object_flags flags);
+
#endif /* OBJECT_STORE_H */
diff --git a/packfile.c b/packfile.c
index 6974903e58..ebcb5742ec 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1885,26 +1885,38 @@ int has_pack_index(const unsigned char *sha1)
return 1;
}
-int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data)
+int for_each_object_in_pack(struct packed_git *p,
+ each_packed_object_fn cb, void *data,
+ enum for_each_object_flags flags)
{
uint32_t i;
int r = 0;
+ if (flags & FOR_EACH_OBJECT_PACK_ORDER)
+ load_pack_revindex(p);
+
for (i = 0; i < p->num_objects; i++) {
+ uint32_t pos;
struct object_id oid;
- if (!nth_packed_object_oid(&oid, p, i))
+ if (flags & FOR_EACH_OBJECT_PACK_ORDER)
+ pos = p->revindex[i].nr;
+ else
+ pos = i;
+
+ if (!nth_packed_object_oid(&oid, p, pos))
return error("unable to get sha1 of object %u in %s",
- i, p->pack_name);
+ pos, p->pack_name);
- r = cb(&oid, p, i, data);
+ r = cb(&oid, p, pos, data);
if (r)
break;
}
return r;
}
-int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
+int for_each_packed_object(each_packed_object_fn cb, void *data,
+ enum for_each_object_flags flags)
{
struct packed_git *p;
int r = 0;
@@ -1921,7 +1933,7 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
pack_errors = 1;
continue;
}
- r = for_each_object_in_pack(p, cb, data);
+ r = for_each_object_in_pack(p, cb, data, flags);
if (r)
break;
}
diff --git a/packfile.h b/packfile.h
index fa36c473ad..630f35cf31 100644
--- a/packfile.h
+++ b/packfile.h
@@ -149,23 +149,6 @@ extern int has_object_pack(const struct object_id *oid);
extern int has_pack_index(const unsigned char *sha1);
/*
- * Only iterate over packs obtained from the promisor remote.
- */
-#define FOR_EACH_OBJECT_PROMISOR_ONLY 2
-
-/*
- * Iterate over packed objects in both the local
- * repository and any alternates repositories (unless the
- * FOR_EACH_OBJECT_LOCAL_ONLY flag, defined in cache.h, is set).
- */
-typedef int each_packed_object_fn(const struct object_id *oid,
- struct packed_git *pack,
- uint32_t pos,
- void *data);
-extern int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn, void *data);
-extern int for_each_packed_object(each_packed_object_fn, void *, unsigned flags);
-
-/*
* Return 1 if an object in a promisor packfile is or refers to the given
* object, 0 otherwise.
*/
diff --git a/sha1-file.c b/sha1-file.c
index c6ca960eb2..56e5329caf 100644
--- a/sha1-file.c
+++ b/sha1-file.c
@@ -2146,7 +2146,8 @@ static int loose_from_alt_odb(struct alternate_object_database *alt,
return r;
}
-int for_each_loose_object(each_loose_object_fn cb, void *data, unsigned flags)
+int for_each_loose_object(each_loose_object_fn cb, void *data,
+ enum for_each_object_flags flags)
{
struct loose_alt_odb_data alt;
int r;
diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index 13dd510b2e..7f19d591f2 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -550,8 +550,8 @@ test_expect_success 'git cat-file --batch --follow-symlink returns correct sha a
test_expect_success 'cat-file --batch-all-objects shows all objects' '
# make new repos so we know the full set of objects; we will
# also make sure that there are some packed and some loose
- # objects, some referenced and some not, and that there are
- # some available only via alternates.
+ # objects, some referenced and some not, some duplicates, and that
+ # there are some available only via alternates.
git init all-one &&
(
cd all-one &&
@@ -567,10 +567,23 @@ test_expect_success 'cat-file --batch-all-objects shows all objects' '
cd all-two &&
echo local-unref | git hash-object -w --stdin
) >>expect.unsorted &&
+ git -C all-two rev-parse HEAD:file |
+ git -C all-two pack-objects .git/objects/pack/pack &&
sort <expect.unsorted >expect &&
git -C all-two cat-file --batch-all-objects \
--batch-check="%(objectname)" >actual &&
test_cmp expect actual
'
+# The only user-visible difference is that the objects are no longer sorted,
+# and the resulting sort order is undefined. So we can only check that it
+# produces the same objects as the ordered case, but that at least exercises
+# the code.
+test_expect_success 'cat-file --unordered works' '
+ git -C all-two cat-file --batch-all-objects --unordered \
+ --batch-check="%(objectname)" >actual.unsorted &&
+ sort <actual.unsorted >actual &&
+ test_cmp expect actual
+'
+
test_done