summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar Junio C Hamano <gitster@pobox.com>2017-12-27 11:16:20 -0800
committerLibravatar Junio C Hamano <gitster@pobox.com>2017-12-27 11:16:21 -0800
commit61061abba7d60f555e97a22ab5775a9d53db1660 (patch)
tree85b38a1b4e668ce6d19e2b6a35e75cc04b77f833
parentRelNotes: the tenth batch (diff)
parentrev-list: support --no-filter argument (diff)
downloadtgif-61061abba7d60f555e97a22ab5775a9d53db1660.tar.xz
Merge branch 'jh/object-filtering'
In preparation for implementing narrow/partial clone, the object walking machinery has been taught a way to tell it to "filter" some objects from enumeration. * jh/object-filtering: rev-list: support --no-filter argument list-objects-filter-options: support --no-filter list-objects-filter-options: fix 'keword' typo in comment pack-objects: add list-objects filtering rev-list: add list-objects filtering support list-objects: filter objects in traverse_commit_list oidset: add iterator methods to oidset oidmap: add oidmap iterator methods dir: allow exclusions from blob in addition to file
-rw-r--r--Documentation/git-pack-objects.txt22
-rw-r--r--Documentation/git-rev-list.txt4
-rw-r--r--Documentation/rev-list-options.txt41
-rw-r--r--Makefile2
-rw-r--r--builtin/pack-objects.c64
-rw-r--r--builtin/rev-list.c112
-rw-r--r--dir.c132
-rw-r--r--dir.h3
-rw-r--r--list-objects-filter-options.c92
-rw-r--r--list-objects-filter-options.h61
-rw-r--r--list-objects-filter.c401
-rw-r--r--list-objects-filter.h77
-rw-r--r--list-objects.c95
-rw-r--r--list-objects.h13
-rw-r--r--object.h1
-rw-r--r--oidmap.h22
-rw-r--r--oidset.c10
-rw-r--r--oidset.h36
-rwxr-xr-xt/t5317-pack-objects-filter-objects.sh375
-rwxr-xr-xt/t6112-rev-list-filters-objects.sh225
20 files changed, 1735 insertions, 53 deletions
diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt
index 473a16135a..aa403d02f3 100644
--- a/Documentation/git-pack-objects.txt
+++ b/Documentation/git-pack-objects.txt
@@ -12,7 +12,8 @@ SYNOPSIS
'git pack-objects' [-q | --progress | --all-progress] [--all-progress-implied]
[--no-reuse-delta] [--delta-base-offset] [--non-empty]
[--local] [--incremental] [--window=<n>] [--depth=<n>]
- [--revs [--unpacked | --all]] [--stdout | base-name]
+ [--revs [--unpacked | --all]]
+ [--stdout [--filter=<filter-spec>] | base-name]
[--shallow] [--keep-true-parents] < object-list
@@ -236,6 +237,25 @@ So does `git bundle` (see linkgit:git-bundle[1]) when it creates a bundle.
With this option, parents that are hidden by grafts are packed
nevertheless.
+--filter=<filter-spec>::
+ Requires `--stdout`. Omits certain objects (usually blobs) from
+ the resulting packfile. See linkgit:git-rev-list[1] for valid
+ `<filter-spec>` forms.
+
+--no-filter::
+ Turns off any previous `--filter=` argument.
+
+--missing=<missing-action>::
+ A debug option to help with future "partial clone" development.
+ This option specifies how missing objects are handled.
++
+The form '--missing=error' requests that pack-objects stop with an error if
+a missing object is encountered. This is the default action.
++
+The form '--missing=allow-any' will allow object traversal to continue
+if a missing object is encountered. Missing objects will silently be
+omitted from the results.
+
SEE ALSO
--------
linkgit:git-rev-list[1]
diff --git a/Documentation/git-rev-list.txt b/Documentation/git-rev-list.txt
index ef22f1775b..88609ff435 100644
--- a/Documentation/git-rev-list.txt
+++ b/Documentation/git-rev-list.txt
@@ -47,7 +47,9 @@ SYNOPSIS
[ --fixed-strings | -F ]
[ --date=<format>]
[ [ --objects | --objects-edge | --objects-edge-aggressive ]
- [ --unpacked ] ]
+ [ --unpacked ]
+ [ --filter=<filter-spec> [ --filter-print-omitted ] ] ]
+ [ --missing=<missing-action> ]
[ --pretty | --header ]
[ --bisect ]
[ --bisect-vars ]
diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index 13501e1556..8d8b7f492a 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -706,6 +706,47 @@ ifdef::git-rev-list[]
--unpacked::
Only useful with `--objects`; print the object IDs that are not
in packs.
+
+--filter=<filter-spec>::
+ Only useful with one of the `--objects*`; omits objects (usually
+ blobs) from the list of printed objects. The '<filter-spec>'
+ may be one of the following:
++
+The form '--filter=blob:none' omits all blobs.
++
+The form '--filter=blob:limit=<n>[kmg]' omits blobs larger than n bytes
+or units. n may be zero. The suffixes k, m, and g can be used to name
+units in KiB, MiB, or GiB. For example, 'blob:limit=1k' is the same
+as 'blob:limit=1024'.
++
+The form '--filter=sparse:oid=<blob-ish>' uses a sparse-checkout
+specification contained in the blob (or blob-expression) '<blob-ish>'
+to omit blobs that would not be not required for a sparse checkout on
+the requested refs.
++
+The form '--filter=sparse:path=<path>' similarly uses a sparse-checkout
+specification contained in <path>.
+
+--no-filter::
+ Turn off any previous `--filter=` argument.
+
+--filter-print-omitted::
+ Only useful with `--filter=`; prints a list of the objects omitted
+ by the filter. Object IDs are prefixed with a ``~'' character.
+
+--missing=<missing-action>::
+ A debug option to help with future "partial clone" development.
+ This option specifies how missing objects are handled.
++
+The form '--missing=error' requests that rev-list stop with an error if
+a missing object is encountered. This is the default action.
++
+The form '--missing=allow-any' will allow object traversal to continue
+if a missing object is encountered. Missing objects will silently be
+omitted from the results.
++
+The form '--missing=print' is like 'allow-any', but will also print a
+list of the missing objects. Object IDs are prefixed with a ``?'' character.
endif::git-rev-list[]
--no-walk[=(sorted|unsorted)]::
diff --git a/Makefile b/Makefile
index 9dc5a588e2..80e0674d6f 100644
--- a/Makefile
+++ b/Makefile
@@ -811,6 +811,8 @@ LIB_OBJS += levenshtein.o
LIB_OBJS += line-log.o
LIB_OBJS += line-range.o
LIB_OBJS += list-objects.o
+LIB_OBJS += list-objects-filter.o
+LIB_OBJS += list-objects-filter-options.o
LIB_OBJS += ll-merge.o
LIB_OBJS += lockfile.o
LIB_OBJS += log-tree.o
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 631de28761..6b9cfc289d 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -15,6 +15,8 @@
#include "diff.h"
#include "revision.h"
#include "list-objects.h"
+#include "list-objects-filter.h"
+#include "list-objects-filter-options.h"
#include "pack-objects.h"
#include "progress.h"
#include "refs.h"
@@ -79,6 +81,15 @@ static unsigned long cache_max_small_delta_size = 1000;
static unsigned long window_memory_limit = 0;
+static struct list_objects_filter_options filter_options;
+
+enum missing_action {
+ MA_ERROR = 0, /* fail if any missing objects are encountered */
+ MA_ALLOW_ANY, /* silently allow ALL missing objects */
+};
+static enum missing_action arg_missing_action;
+static show_object_fn fn_show_object;
+
/*
* stats
*/
@@ -2553,6 +2564,42 @@ static void show_object(struct object *obj, const char *name, void *data)
obj->flags |= OBJECT_ADDED;
}
+static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
+{
+ assert(arg_missing_action == MA_ALLOW_ANY);
+
+ /*
+ * Quietly ignore ALL missing objects. This avoids problems with
+ * staging them now and getting an odd error later.
+ */
+ if (!has_object_file(&obj->oid))
+ return;
+
+ show_object(obj, name, data);
+}
+
+static int option_parse_missing_action(const struct option *opt,
+ const char *arg, int unset)
+{
+ assert(arg);
+ assert(!unset);
+
+ if (!strcmp(arg, "error")) {
+ arg_missing_action = MA_ERROR;
+ fn_show_object = show_object;
+ return 0;
+ }
+
+ if (!strcmp(arg, "allow-any")) {
+ arg_missing_action = MA_ALLOW_ANY;
+ fn_show_object = show_object__ma_allow_any;
+ return 0;
+ }
+
+ die(_("invalid value for --missing"));
+ return 0;
+}
+
static void show_edge(struct commit *commit)
{
add_preferred_base(&commit->object.oid);
@@ -2817,7 +2864,12 @@ static void get_object_list(int ac, const char **av)
if (prepare_revision_walk(&revs))
die("revision walk setup failed");
mark_edges_uninteresting(&revs, show_edge);
- traverse_commit_list(&revs, show_commit, show_object, NULL);
+
+ if (!fn_show_object)
+ fn_show_object = show_object;
+ traverse_commit_list_filtered(&filter_options, &revs,
+ show_commit, fn_show_object, NULL,
+ NULL);
if (unpack_unreachable_expiration) {
revs.ignore_missing_links = 1;
@@ -2953,6 +3005,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
N_("use a bitmap index if available to speed up counting objects")),
OPT_BOOL(0, "write-bitmap-index", &write_bitmap_index,
N_("write a bitmap index together with the pack index")),
+ OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
+ { OPTION_CALLBACK, 0, "missing", NULL, N_("action"),
+ N_("handling for missing objects"), PARSE_OPT_NONEG,
+ option_parse_missing_action },
OPT_END(),
};
@@ -3029,6 +3085,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (!rev_list_all || !rev_list_reflog || !rev_list_index)
unpack_unreachable_expiration = 0;
+ if (filter_options.choice) {
+ if (!pack_to_stdout)
+ die("cannot use --filter without --stdout.");
+ use_bitmap_index = 0;
+ }
+
/*
* "soft" reasons not to use bitmaps - for on-disk repack by default we want
*
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 4032eb3811..d5345b6a2e 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -4,6 +4,8 @@
#include "diff.h"
#include "revision.h"
#include "list-objects.h"
+#include "list-objects-filter.h"
+#include "list-objects-filter-options.h"
#include "pack.h"
#include "pack-bitmap.h"
#include "builtin.h"
@@ -12,6 +14,7 @@
#include "bisect.h"
#include "progress.h"
#include "reflog-walk.h"
+#include "oidset.h"
static const char rev_list_usage[] =
"git rev-list [OPTION] <commit-id>... [ -- paths... ]\n"
@@ -55,6 +58,20 @@ static const char rev_list_usage[] =
static struct progress *progress;
static unsigned progress_counter;
+static struct list_objects_filter_options filter_options;
+static struct oidset omitted_objects;
+static int arg_print_omitted; /* print objects omitted by filter */
+
+static struct oidset missing_objects;
+enum missing_action {
+ MA_ERROR = 0, /* fail if any missing objects are encountered */
+ MA_ALLOW_ANY, /* silently allow ALL missing objects */
+ MA_PRINT, /* print ALL missing objects in special section */
+};
+static enum missing_action arg_missing_action;
+
+#define DEFAULT_OIDSET_SIZE (16*1024)
+
static void finish_commit(struct commit *commit, void *data);
static void show_commit(struct commit *commit, void *data)
{
@@ -178,11 +195,31 @@ static void finish_commit(struct commit *commit, void *data)
free_commit_buffer(commit);
}
+static inline void finish_object__ma(struct object *obj)
+{
+ switch (arg_missing_action) {
+ case MA_ERROR:
+ die("missing blob object '%s'", oid_to_hex(&obj->oid));
+ return;
+
+ case MA_ALLOW_ANY:
+ return;
+
+ case MA_PRINT:
+ oidset_insert(&missing_objects, &obj->oid);
+ return;
+
+ default:
+ BUG("unhandled missing_action");
+ return;
+ }
+}
+
static void finish_object(struct object *obj, const char *name, void *cb_data)
{
struct rev_list_info *info = cb_data;
if (obj->type == OBJ_BLOB && !has_object_file(&obj->oid))
- die("missing blob object '%s'", oid_to_hex(&obj->oid));
+ finish_object__ma(obj);
if (info->revs->verify_objects && !obj->parsed && obj->type != OBJ_COMMIT)
parse_object(&obj->oid);
}
@@ -269,6 +306,26 @@ static int show_object_fast(
return 1;
}
+static inline int parse_missing_action_value(const char *value)
+{
+ if (!strcmp(value, "error")) {
+ arg_missing_action = MA_ERROR;
+ return 1;
+ }
+
+ if (!strcmp(value, "allow-any")) {
+ arg_missing_action = MA_ALLOW_ANY;
+ return 1;
+ }
+
+ if (!strcmp(value, "print")) {
+ arg_missing_action = MA_PRINT;
+ return 1;
+ }
+
+ return 0;
+}
+
int cmd_rev_list(int argc, const char **argv, const char *prefix)
{
struct rev_info revs;
@@ -335,6 +392,30 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
show_progress = arg;
continue;
}
+
+ if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) {
+ parse_list_objects_filter(&filter_options, arg);
+ if (filter_options.choice && !revs.blob_objects)
+ die(_("object filtering requires --objects"));
+ if (filter_options.choice == LOFC_SPARSE_OID &&
+ !filter_options.sparse_oid_value)
+ die(_("invalid sparse value '%s'"),
+ filter_options.filter_spec);
+ continue;
+ }
+ if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) {
+ list_objects_filter_release(&filter_options);
+ continue;
+ }
+ if (!strcmp(arg, "--filter-print-omitted")) {
+ arg_print_omitted = 1;
+ continue;
+ }
+
+ if (skip_prefix(arg, "--missing=", &arg) &&
+ parse_missing_action_value(arg))
+ continue;
+
usage(rev_list_usage);
}
@@ -360,6 +441,9 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
if (revs.show_notes)
die(_("rev-list does not support display of notes"));
+ if (filter_options.choice && use_bitmap_index)
+ die(_("cannot combine --use-bitmap-index with object filtering"));
+
save_commit_buffer = (revs.verbose_header ||
revs.grep_filter.pattern_list ||
revs.grep_filter.header_list);
@@ -403,7 +487,31 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
return show_bisect_vars(&info, reaches, all);
}
- traverse_commit_list(&revs, show_commit, show_object, &info);
+ if (arg_print_omitted)
+ oidset_init(&omitted_objects, DEFAULT_OIDSET_SIZE);
+ if (arg_missing_action == MA_PRINT)
+ oidset_init(&missing_objects, DEFAULT_OIDSET_SIZE);
+
+ traverse_commit_list_filtered(
+ &filter_options, &revs, show_commit, show_object, &info,
+ (arg_print_omitted ? &omitted_objects : NULL));
+
+ if (arg_print_omitted) {
+ struct oidset_iter iter;
+ struct object_id *oid;
+ oidset_iter_init(&omitted_objects, &iter);
+ while ((oid = oidset_iter_next(&iter)))
+ printf("~%s\n", oid_to_hex(oid));
+ oidset_clear(&omitted_objects);
+ }
+ if (arg_missing_action == MA_PRINT) {
+ struct oidset_iter iter;
+ struct object_id *oid;
+ oidset_iter_init(&missing_objects, &iter);
+ while ((oid = oidset_iter_next(&iter)))
+ printf("?%s\n", oid_to_hex(oid));
+ oidset_clear(&missing_objects);
+ }
stop_progress(&progress);
diff --git a/dir.c b/dir.c
index 3c54366a17..7c4b45e30e 100644
--- a/dir.c
+++ b/dir.c
@@ -221,6 +221,57 @@ int within_depth(const char *name, int namelen,
return 1;
}
+/*
+ * Read the contents of the blob with the given OID into a buffer.
+ * Append a trailing LF to the end if the last line doesn't have one.
+ *
+ * Returns:
+ * -1 when the OID is invalid or unknown or does not refer to a blob.
+ * 0 when the blob is empty.
+ * 1 along with { data, size } of the (possibly augmented) buffer
+ * when successful.
+ *
+ * Optionally updates the given sha1_stat with the given OID (when valid).
+ */
+static int do_read_blob(const struct object_id *oid,
+ struct sha1_stat *sha1_stat,
+ size_t *size_out,
+ char **data_out)
+{
+ enum object_type type;
+ unsigned long sz;
+ char *data;
+
+ *size_out = 0;
+ *data_out = NULL;
+
+ data = read_sha1_file(oid->hash, &type, &sz);
+ if (!data || type != OBJ_BLOB) {
+ free(data);
+ return -1;
+ }
+
+ if (sha1_stat) {
+ memset(&sha1_stat->stat, 0, sizeof(sha1_stat->stat));
+ hashcpy(sha1_stat->sha1, oid->hash);
+ }
+
+ if (sz == 0) {
+ free(data);
+ return 0;
+ }
+
+ if (data[sz - 1] != '\n') {
+ data = xrealloc(data, st_add(sz, 1));
+ data[sz++] = '\n';
+ }
+
+ *size_out = xsize_t(sz);
+ *data_out = data;
+
+ return 1;
+}
+
#define DO_MATCH_EXCLUDE (1<<0)
#define DO_MATCH_DIRECTORY (1<<1)
#define DO_MATCH_SUBMODULE (1<<2)
@@ -601,32 +652,22 @@ void add_exclude(const char *string, const char *base,
x->el = el;
}
-static void *read_skip_worktree_file_from_index(const struct index_state *istate,
- const char *path, size_t *size,
- struct sha1_stat *sha1_stat)
+static int read_skip_worktree_file_from_index(const struct index_state *istate,
+ const char *path,
+ size_t *size_out,
+ char **data_out,
+ struct sha1_stat *sha1_stat)
{
int pos, len;
- unsigned long sz;
- enum object_type type;
- void *data;
len = strlen(path);
pos = index_name_pos(istate, path, len);
if (pos < 0)
- return NULL;
+ return -1;
if (!ce_skip_worktree(istate->cache[pos]))
- return NULL;
- data = read_sha1_file(istate->cache[pos]->oid.hash, &type, &sz);
- if (!data || type != OBJ_BLOB) {
- free(data);
- return NULL;
- }
- *size = xsize_t(sz);
- if (sha1_stat) {
- memset(&sha1_stat->stat, 0, sizeof(sha1_stat->stat));
- hashcpy(sha1_stat->sha1, istate->cache[pos]->oid.hash);
- }
- return data;
+ return -1;
+
+ return do_read_blob(&istate->cache[pos]->oid, sha1_stat, size_out, data_out);
}
/*
@@ -740,6 +781,10 @@ static void invalidate_directory(struct untracked_cache *uc,
dir->dirs[i]->recurse = 0;
}
+static int add_excludes_from_buffer(char *buf, size_t size,
+ const char *base, int baselen,
+ struct exclude_list *el);
+
/*
* Given a file with name "fname", read it (either from disk, or from
* an index if 'istate' is non-null), parse it and store the
@@ -755,9 +800,10 @@ static int add_excludes(const char *fname, const char *base, int baselen,
struct sha1_stat *sha1_stat)
{
struct stat st;
- int fd, i, lineno = 1;
+ int r;
+ int fd;
size_t size = 0;
- char *buf, *entry;
+ char *buf;
fd = open(fname, O_RDONLY);
if (fd < 0 || fstat(fd, &st) < 0) {
@@ -765,17 +811,13 @@ static int add_excludes(const char *fname, const char *base, int baselen,
warn_on_fopen_errors(fname);
else
close(fd);
- if (!istate ||
- (buf = read_skip_worktree_file_from_index(istate, fname, &size, sha1_stat)) == NULL)
+ if (!istate)
return -1;
- if (size == 0) {
- free(buf);
- return 0;
- }
- if (buf[size-1] != '\n') {
- buf = xrealloc(buf, st_add(size, 1));
- buf[size++] = '\n';
- }
+ r = read_skip_worktree_file_from_index(istate, fname,
+ &size, &buf,
+ sha1_stat);
+ if (r != 1)
+ return r;
} else {
size = xsize_t(st.st_size);
if (size == 0) {
@@ -814,6 +856,17 @@ static int add_excludes(const char *fname, const char *base, int baselen,
}
}
+ add_excludes_from_buffer(buf, size, base, baselen, el);
+ return 0;
+}
+
+static int add_excludes_from_buffer(char *buf, size_t size,
+ const char *base, int baselen,
+ struct exclude_list *el)
+{
+ int i, lineno = 1;
+ char *entry;
+
el->filebuf = buf;
if (skip_utf8_bom(&buf, size))
@@ -842,6 +895,23 @@ int add_excludes_from_file_to_list(const char *fname, const char *base,
return add_excludes(fname, base, baselen, el, istate, NULL);
}
+int add_excludes_from_blob_to_list(
+ struct object_id *oid,
+ const char *base, int baselen,
+ struct exclude_list *el)
+{
+ char *buf;
+ size_t size;
+ int r;
+
+ r = do_read_blob(oid, NULL, &size, &buf);
+ if (r != 1)
+ return r;
+
+ add_excludes_from_buffer(buf, size, base, baselen, el);
+ return 0;
+}
+
struct exclude_list *add_exclude_list(struct dir_struct *dir,
int group_type, const char *src)
{
diff --git a/dir.h b/dir.h
index 233a2eb36b..11a047ba48 100644
--- a/dir.h
+++ b/dir.h
@@ -259,6 +259,9 @@ extern struct exclude_list *add_exclude_list(struct dir_struct *dir,
extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
struct exclude_list *el, struct index_state *istate);
extern void add_excludes_from_file(struct dir_struct *, const char *fname);
+extern int add_excludes_from_blob_to_list(struct object_id *oid,
+ const char *base, int baselen,
+ struct exclude_list *el);
extern void parse_exclude_pattern(const char **string, int *patternlen, unsigned *flags, int *nowildcardlen);
extern void add_exclude(const char *string, const char *base,
int baselen, struct exclude_list *el, int srcpos);
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
new file mode 100644
index 0000000000..4c5b34e949
--- /dev/null
+++ b/list-objects-filter-options.c
@@ -0,0 +1,92 @@
+#include "cache.h"
+#include "commit.h"
+#include "config.h"
+#include "revision.h"
+#include "argv-array.h"
+#include "list-objects.h"
+#include "list-objects-filter.h"
+#include "list-objects-filter-options.h"
+
+/*
+ * Parse value of the argument to the "filter" keyword.
+ * On the command line this looks like:
+ * --filter=<arg>
+ * and in the pack protocol as:
+ * "filter" SP <arg>
+ *
+ * The filter keyword will be used by many commands.
+ * See Documentation/rev-list-options.txt for allowed values for <arg>.
+ *
+ * Capture the given arg as the "filter_spec". This can be forwarded to
+ * subordinate commands when necessary. We also "intern" the arg for
+ * the convenience of the current command.
+ */
+int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
+ const char *arg)
+{
+ const char *v0;
+
+ if (filter_options->choice)
+ die(_("multiple object filter types cannot be combined"));
+
+ filter_options->filter_spec = strdup(arg);
+
+ if (!strcmp(arg, "blob:none")) {
+ filter_options->choice = LOFC_BLOB_NONE;
+ return 0;
+ }
+
+ if (skip_prefix(arg, "blob:limit=", &v0)) {
+ if (!git_parse_ulong(v0, &filter_options->blob_limit_value))
+ die(_("invalid filter-spec expression '%s'"), arg);
+ filter_options->choice = LOFC_BLOB_LIMIT;
+ return 0;
+ }
+
+ if (skip_prefix(arg, "sparse:oid=", &v0)) {
+ struct object_context oc;
+ struct object_id sparse_oid;
+
+ /*
+ * Try to parse <oid-expression> into an OID for the current
+ * command, but DO NOT complain if we don't have the blob or
+ * ref locally.
+ */
+ if (!get_oid_with_context(v0, GET_OID_BLOB,
+ &sparse_oid, &oc))
+ filter_options->sparse_oid_value = oiddup(&sparse_oid);
+ filter_options->choice = LOFC_SPARSE_OID;
+ return 0;
+ }
+
+ if (skip_prefix(arg, "sparse:path=", &v0)) {
+ filter_options->choice = LOFC_SPARSE_PATH;
+ filter_options->sparse_path_value = strdup(v0);
+ return 0;
+ }
+
+ die(_("invalid filter-spec expression '%s'"), arg);
+ return 0;
+}
+
+int opt_parse_list_objects_filter(const struct option *opt,
+ const char *arg, int unset)
+{
+ struct list_objects_filter_options *filter_options = opt->value;
+
+ if (unset || !arg) {
+ list_objects_filter_release(filter_options);
+ return 0;
+ }
+
+ return parse_list_objects_filter(filter_options, arg);
+}
+
+void list_objects_filter_release(
+ struct list_objects_filter_options *filter_options)
+{
+ free(filter_options->filter_spec);
+ free(filter_options->sparse_oid_value);
+ free(filter_options->sparse_path_value);
+ memset(filter_options, 0, sizeof(*filter_options));
+}
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
new file mode 100644
index 0000000000..eea44a1a51
--- /dev/null
+++ b/list-objects-filter-options.h
@@ -0,0 +1,61 @@
+#ifndef LIST_OBJECTS_FILTER_OPTIONS_H
+#define LIST_OBJECTS_FILTER_OPTIONS_H
+
+#include "parse-options.h"
+
+/*
+ * The list of defined filters for list-objects.
+ */
+enum list_objects_filter_choice {
+ LOFC_DISABLED = 0,
+ LOFC_BLOB_NONE,
+ LOFC_BLOB_LIMIT,
+ LOFC_SPARSE_OID,
+ LOFC_SPARSE_PATH,
+ LOFC__COUNT /* must be last */
+};
+
+struct list_objects_filter_options {
+ /*
+ * 'filter_spec' is the raw argument value given on the command line
+ * or protocol request. (The part after the "--keyword=".) For
+ * commands that launch filtering sub-processes, this value should be
+ * passed to them as received by the current process.
+ */
+ char *filter_spec;
+
+ /*
+ * 'choice' is determined by parsing the filter-spec. This indicates
+ * the filtering algorithm to use.
+ */
+ enum list_objects_filter_choice choice;
+
+ /*
+ * Parsed values (fields) from within the filter-spec. These are
+ * choice-specific; not all values will be defined for any given
+ * choice.
+ */
+ struct object_id *sparse_oid_value;
+ char *sparse_path_value;
+ unsigned long blob_limit_value;
+};
+
+/* Normalized command line arguments */
+#define CL_ARG__FILTER "filter"
+
+int parse_list_objects_filter(
+ struct list_objects_filter_options *filter_options,
+ const char *arg);
+
+int opt_parse_list_objects_filter(const struct option *opt,
+ const char *arg, int unset);
+
+#define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \
+ { OPTION_CALLBACK, 0, CL_ARG__FILTER, fo, N_("args"), \
+ N_("object filtering"), 0, \
+ opt_parse_list_objects_filter }
+
+void list_objects_filter_release(
+ struct list_objects_filter_options *filter_options);
+
+#endif /* LIST_OBJECTS_FILTER_OPTIONS_H */
diff --git a/list-objects-filter.c b/list-objects-filter.c
new file mode 100644
index 0000000000..4356c45368
--- /dev/null
+++ b/list-objects-filter.c
@@ -0,0 +1,401 @@
+#include "cache.h"
+#include "dir.h"
+#include "tag.h"
+#include "commit.h"
+#include "tree.h"
+#include "blob.h"
+#include "diff.h"
+#include "tree-walk.h"
+#include "revision.h"
+#include "list-objects.h"
+#include "list-objects-filter.h"
+#include "list-objects-filter-options.h"
+#include "oidset.h"
+
+/* Remember to update object flag allocation in object.h */
+/*
+ * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
+ * that have been shown, but should be revisited if they appear
+ * in the traversal (until we mark it SEEN). This is a way to
+ * let us silently de-dup calls to show() in the caller. This
+ * is subtly different from the "revision.h:SHOWN" and the
+ * "sha1_name.c:ONELINE_SEEN" bits. And also different from
+ * the non-de-dup usage in pack-bitmap.c
+ */
+#define FILTER_SHOWN_BUT_REVISIT (1<<21)
+
+/*
+ * A filter for list-objects to omit ALL blobs from the traversal.
+ * And to OPTIONALLY collect a list of the omitted OIDs.
+ */
+struct filter_blobs_none_data {
+ struct oidset *omits;
+};
+
+static enum list_objects_filter_result filter_blobs_none(
+ enum list_objects_filter_situation filter_situation,
+ struct object *obj,
+ const char *pathname,
+ const char *filename,
+ void *filter_data_)
+{
+ struct filter_blobs_none_data *filter_data = filter_data_;
+
+ switch (filter_situation) {
+ default:
+ die("unknown filter_situation");
+ return LOFR_ZERO;
+
+ case LOFS_BEGIN_TREE:
+ assert(obj->type == OBJ_TREE);
+ /* always include all tree objects */
+ return LOFR_MARK_SEEN | LOFR_DO_SHOW;
+
+ case LOFS_END_TREE:
+ assert(obj->type == OBJ_TREE);
+ return LOFR_ZERO;
+
+ case LOFS_BLOB:
+ assert(obj->type == OBJ_BLOB);
+ assert((obj->flags & SEEN) == 0);
+
+ if (filter_data->omits)
+ oidset_insert(filter_data->omits, &obj->oid);
+ return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
+ }
+}
+
+static void *filter_blobs_none__init(
+ struct oidset *omitted,
+ struct list_objects_filter_options *filter_options,
+ filter_object_fn *filter_fn,
+ filter_free_fn *filter_free_fn)
+{
+ struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
+ d->omits = omitted;
+
+ *filter_fn = filter_blobs_none;
+ *filter_free_fn = free;
+ return d;
+}
+
+/*
+ * A filter for list-objects to omit large blobs.
+ * And to OPTIONALLY collect a list of the omitted OIDs.
+ */
+struct filter_blobs_limit_data {
+ struct oidset *omits;
+ unsigned long max_bytes;
+};
+
+static enum list_objects_filter_result filter_blobs_limit(
+ enum list_objects_filter_situation filter_situation,
+ struct object *obj,
+ const char *pathname,
+ const char *filename,
+ void *filter_data_)
+{
+ struct filter_blobs_limit_data *filter_data = filter_data_;
+ unsigned long object_length;
+ enum object_type t;
+
+ switch (filter_situation) {
+ default:
+ die("unknown filter_situation");
+ return LOFR_ZERO;
+
+ case LOFS_BEGIN_TREE:
+ assert(obj->type == OBJ_TREE);
+ /* always include all tree objects */
+ return LOFR_MARK_SEEN | LOFR_DO_SHOW;
+
+ case LOFS_END_TREE:
+ assert(obj->type == OBJ_TREE);
+ return LOFR_ZERO;
+
+ case LOFS_BLOB:
+ assert(obj->type == OBJ_BLOB);
+ assert((obj->flags & SEEN) == 0);
+
+ t = sha1_object_info(obj->oid.hash, &object_length);
+ if (t != OBJ_BLOB) { /* probably OBJ_NONE */
+ /*
+ * We DO NOT have the blob locally, so we cannot
+ * apply the size filter criteria. Be conservative
+ * and force show it (and let the caller deal with
+ * the ambiguity).
+ */
+ goto include_it;
+ }
+
+ if (object_length < filter_data->max_bytes)
+ goto include_it;
+
+ if (filter_data->omits)
+ oidset_insert(filter_data->omits, &obj->oid);
+ return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
+ }
+
+include_it:
+ if (filter_data->omits)
+ oidset_remove(filter_data->omits, &obj->oid);
+ return LOFR_MARK_SEEN | LOFR_DO_SHOW;
+}
+
+static void *filter_blobs_limit__init(
+ struct oidset *omitted,
+ struct list_objects_filter_options *filter_options,
+ filter_object_fn *filter_fn,
+ filter_free_fn *filter_free_fn)
+{
+ struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
+ d->omits = omitted;
+ d->max_bytes = filter_options->blob_limit_value;
+
+ *filter_fn = filter_blobs_limit;