summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--list-objects-filter-options.c106
-rw-r--r--list-objects-filter-options.h17
-rw-r--r--list-objects-filter.c161
-rw-r--r--list-objects-filter.h13
-rwxr-xr-xt/t6112-rev-list-filters-objects.sh151
-rw-r--r--url.c6
-rw-r--r--url.h8
7 files changed, 454 insertions, 8 deletions
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 7c3e397d29..75d0236ee2 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -6,6 +6,12 @@
#include "list-objects.h"
#include "list-objects-filter.h"
#include "list-objects-filter-options.h"
+#include "url.h"
+
+static int parse_combine_filter(
+ struct list_objects_filter_options *filter_options,
+ const char *arg,
+ struct strbuf *errbuf);
/*
* Parse value of the argument to the "filter" keyword.
@@ -35,8 +41,6 @@ static int gently_parse_list_objects_filter(
return 1;
}
- filter_options->filter_spec = strdup(arg);
-
if (!strcmp(arg, "blob:none")) {
filter_options->choice = LOFC_BLOB_NONE;
return 0;
@@ -77,6 +81,10 @@ static int gently_parse_list_objects_filter(
_("sparse:path filters support has been dropped"));
}
return 1;
+
+ } else if (skip_prefix(arg, "combine:", &v0)) {
+ return parse_combine_filter(filter_options, v0, errbuf);
+
}
/*
* Please update _git_fetch() in git-completion.bash when you
@@ -89,10 +97,95 @@ static int gently_parse_list_objects_filter(
return 1;
}
+static const char *RESERVED_NON_WS = "~`!@#$^&*()[]{}\\;'\",<>?";
+
+static int has_reserved_character(
+ struct strbuf *sub_spec, struct strbuf *errbuf)
+{
+ const char *c = sub_spec->buf;
+ while (*c) {
+ if (*c <= ' ' || strchr(RESERVED_NON_WS, *c)) {
+ strbuf_addf(
+ errbuf,
+ _("must escape char in sub-filter-spec: '%c'"),
+ *c);
+ return 1;
+ }
+ c++;
+ }
+
+ return 0;
+}
+
+static int parse_combine_subfilter(
+ struct list_objects_filter_options *filter_options,
+ struct strbuf *subspec,
+ struct strbuf *errbuf)
+{
+ size_t new_index = filter_options->sub_nr++;
+ char *decoded;
+ int result;
+
+ ALLOC_GROW(filter_options->sub, filter_options->sub_nr,
+ filter_options->sub_alloc);
+ memset(&filter_options->sub[new_index], 0,
+ sizeof(*filter_options->sub));
+
+ decoded = url_percent_decode(subspec->buf);
+
+ result = has_reserved_character(subspec, errbuf) ||
+ gently_parse_list_objects_filter(
+ &filter_options->sub[new_index], decoded, errbuf);
+
+ free(decoded);
+ return result;
+}
+
+static int parse_combine_filter(
+ struct list_objects_filter_options *filter_options,
+ const char *arg,
+ struct strbuf *errbuf)
+{
+ struct strbuf **subspecs = strbuf_split_str(arg, '+', 0);
+ size_t sub;
+ int result = 0;
+
+ if (!subspecs[0]) {
+ strbuf_addstr(errbuf, _("expected something after combine:"));
+ result = 1;
+ goto cleanup;
+ }
+
+ for (sub = 0; subspecs[sub] && !result; sub++) {
+ if (subspecs[sub + 1]) {
+ /*
+ * This is not the last subspec. Remove trailing "+" so
+ * we can parse it.
+ */
+ size_t last = subspecs[sub]->len - 1;
+ assert(subspecs[sub]->buf[last] == '+');
+ strbuf_remove(subspecs[sub], last, 1);
+ }
+ result = parse_combine_subfilter(
+ filter_options, subspecs[sub], errbuf);
+ }
+
+ filter_options->choice = LOFC_COMBINE;
+
+cleanup:
+ strbuf_list_free(subspecs);
+ if (result) {
+ list_objects_filter_release(filter_options);
+ memset(filter_options, 0, sizeof(*filter_options));
+ }
+ return result;
+}
+
int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
const char *arg)
{
struct strbuf buf = STRBUF_INIT;
+ filter_options->filter_spec = strdup(arg);
if (gently_parse_list_objects_filter(filter_options, arg, &buf))
die("%s", buf.buf);
return 0;
@@ -129,8 +222,15 @@ void expand_list_objects_filter_spec(
void list_objects_filter_release(
struct list_objects_filter_options *filter_options)
{
+ size_t sub;
+
+ if (!filter_options)
+ return;
free(filter_options->filter_spec);
free(filter_options->sparse_oid_value);
+ for (sub = 0; sub < filter_options->sub_nr; sub++)
+ list_objects_filter_release(&filter_options->sub[sub]);
+ free(filter_options->sub);
memset(filter_options, 0, sizeof(*filter_options));
}
@@ -174,6 +274,8 @@ void partial_clone_get_default_filter_spec(
*/
if (!core_partial_clone_filter_default)
return;
+
+ filter_options->filter_spec = strdup(core_partial_clone_filter_default);
gently_parse_list_objects_filter(filter_options,
core_partial_clone_filter_default,
&errbuf);
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index c54f0000fb..789faef1e5 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -13,6 +13,7 @@ enum list_objects_filter_choice {
LOFC_BLOB_LIMIT,
LOFC_TREE_DEPTH,
LOFC_SPARSE_OID,
+ LOFC_COMBINE,
LOFC__COUNT /* must be last */
};
@@ -38,13 +39,23 @@ struct list_objects_filter_options {
unsigned int no_filter : 1;
/*
- * Parsed values (fields) from within the filter-spec. These are
- * choice-specific; not all values will be defined for any given
- * choice.
+ * BEGIN choice-specific parsed values from within the filter-spec. Only
+ * some values will be defined for any given choice.
*/
+
struct object_id *sparse_oid_value;
unsigned long blob_limit_value;
unsigned long tree_exclude_depth;
+
+ /* LOFC_COMBINE values */
+
+ /* This array contains all the subfilters which this filter combines. */
+ size_t sub_nr, sub_alloc;
+ struct list_objects_filter_options *sub;
+
+ /*
+ * END choice-specific parsed values.
+ */
};
/* Normalized command line arguments */
diff --git a/list-objects-filter.c b/list-objects-filter.c
index 3b4b6764ca..d664264d65 100644
--- a/list-objects-filter.c
+++ b/list-objects-filter.c
@@ -26,6 +26,14 @@
*/
#define FILTER_SHOWN_BUT_REVISIT (1<<21)
+struct subfilter {
+ struct filter *filter;
+ struct oidset seen;
+ struct oidset omits;
+ struct object_id skip_tree;
+ unsigned is_skipping_tree : 1;
+};
+
struct filter {
enum list_objects_filter_result (*filter_object_fn)(
struct repository *r,
@@ -36,6 +44,23 @@ struct filter {
struct oidset *omits,
void *filter_data);
+ /*
+ * Optional. If this function is supplied and the filter needs
+ * to collect omits, then this function is called once before
+ * free_fn is called.
+ *
+ * This is required because the following two conditions hold:
+ *
+ * a. A tree filter can add and remove objects as an object
+ * graph is traversed.
+ * b. A combine filter's omit set is the union of all its
+ * subfilters, which may include tree: filters.
+ *
+ * As such, the omits sets must be separate sets, and can only
+ * be unioned after the traversal is completed.
+ */
+ void (*finalize_omits_fn)(struct oidset *omits, void *filter_data);
+
void (*free_fn)(void *filter_data);
void *filter_data;
@@ -471,6 +496,139 @@ static void filter_sparse_oid__init(
filter->free_fn = filter_sparse_free;
}
+/* A filter which only shows objects shown by all sub-filters. */
+struct combine_filter_data {
+ struct subfilter *sub;
+ size_t nr;
+};
+
+static enum list_objects_filter_result process_subfilter(
+ struct repository *r,
+ enum list_objects_filter_situation filter_situation,
+ struct object *obj,
+ const char *pathname,
+ const char *filename,
+ struct subfilter *sub)
+{
+ enum list_objects_filter_result result;
+
+ /*
+ * Check and update is_skipping_tree before oidset_contains so
+ * that is_skipping_tree gets unset even when the object is
+ * marked as seen. As of this writing, no filter uses
+ * LOFR_MARK_SEEN on trees that also uses LOFR_SKIP_TREE, so the
+ * ordering is only theoretically important. Be cautious if you
+ * change the order of the below checks and more filters have
+ * been added!
+ */
+ if (sub->is_skipping_tree) {
+ if (filter_situation == LOFS_END_TREE &&
+ oideq(&obj->oid, &sub->skip_tree))
+ sub->is_skipping_tree = 0;
+ else
+ return LOFR_ZERO;
+ }
+ if (oidset_contains(&sub->seen, &obj->oid))
+ return LOFR_ZERO;
+
+ result = list_objects_filter__filter_object(
+ r, filter_situation, obj, pathname, filename, sub->filter);
+
+ if (result & LOFR_MARK_SEEN)
+ oidset_insert(&sub->seen, &obj->oid);
+
+ if (result & LOFR_SKIP_TREE) {
+ sub->is_skipping_tree = 1;
+ sub->skip_tree = obj->oid;
+ }
+
+ return result;
+}
+
+static enum list_objects_filter_result filter_combine(
+ struct repository *r,
+ enum list_objects_filter_situation filter_situation,
+ struct object *obj,
+ const char *pathname,
+ const char *filename,
+ struct oidset *omits,
+ void *filter_data)
+{
+ struct combine_filter_data *d = filter_data;
+ enum list_objects_filter_result combined_result =
+ LOFR_DO_SHOW | LOFR_MARK_SEEN | LOFR_SKIP_TREE;
+ size_t sub;
+
+ for (sub = 0; sub < d->nr; sub++) {
+ enum list_objects_filter_result sub_result = process_subfilter(
+ r, filter_situation, obj, pathname, filename,
+ &d->sub[sub]);
+ if (!(sub_result & LOFR_DO_SHOW))
+ combined_result &= ~LOFR_DO_SHOW;
+ if (!(sub_result & LOFR_MARK_SEEN))
+ combined_result &= ~LOFR_MARK_SEEN;
+ if (!d->sub[sub].is_skipping_tree)
+ combined_result &= ~LOFR_SKIP_TREE;
+ }
+
+ return combined_result;
+}
+
+static void filter_combine__free(void *filter_data)
+{
+ struct combine_filter_data *d = filter_data;
+ size_t sub;
+ for (sub = 0; sub < d->nr; sub++) {
+ list_objects_filter__free(d->sub[sub].filter);
+ oidset_clear(&d->sub[sub].seen);
+ if (d->sub[sub].omits.set.size)
+ BUG("expected oidset to be cleared already");
+ }
+ free(d->sub);
+}
+
+static void add_all(struct oidset *dest, struct oidset *src) {
+ struct oidset_iter iter;
+ struct object_id *src_oid;
+
+ oidset_iter_init(src, &iter);
+ while ((src_oid = oidset_iter_next(&iter)) != NULL)
+ oidset_insert(dest, src_oid);
+}
+
+static void filter_combine__finalize_omits(
+ struct oidset *omits,
+ void *filter_data)
+{
+ struct combine_filter_data *d = filter_data;
+ size_t sub;
+
+ for (sub = 0; sub < d->nr; sub++) {
+ add_all(omits, &d->sub[sub].omits);
+ oidset_clear(&d->sub[sub].omits);
+ }
+}
+
+static void filter_combine__init(
+ struct list_objects_filter_options *filter_options,
+ struct filter* filter)
+{
+ struct combine_filter_data *d = xcalloc(1, sizeof(*d));
+ size_t sub;
+
+ d->nr = filter_options->sub_nr;
+ d->sub = xcalloc(d->nr, sizeof(*d->sub));
+ for (sub = 0; sub < d->nr; sub++)
+ d->sub[sub].filter = list_objects_filter__init(
+ filter->omits ? &d->sub[sub].omits : NULL,
+ &filter_options->sub[sub]);
+
+ filter->filter_data = d;
+ filter->filter_object_fn = filter_combine;
+ filter->free_fn = filter_combine__free;
+ filter->finalize_omits_fn = filter_combine__finalize_omits;
+}
+
typedef void (*filter_init_fn)(
struct list_objects_filter_options *filter_options,
struct filter *filter);
@@ -484,6 +642,7 @@ static filter_init_fn s_filters[] = {
filter_blobs_limit__init,
filter_trees_depth__init,
filter_sparse_oid__init,
+ filter_combine__init,
};
struct filter *list_objects_filter__init(
@@ -536,6 +695,8 @@ void list_objects_filter__free(struct filter *filter)
{
if (!filter)
return;
+ if (filter->finalize_omits_fn && filter->omits)
+ filter->finalize_omits_fn(filter->omits, filter->filter_data);
filter->free_fn(filter->filter_data);
free(filter);
}
diff --git a/list-objects-filter.h b/list-objects-filter.h
index 6908954266..cfd784e203 100644
--- a/list-objects-filter.h
+++ b/list-objects-filter.h
@@ -62,7 +62,13 @@ enum list_objects_filter_situation {
struct filter;
-/* Constructor for the set of defined list-objects filters. */
+/*
+ * Constructor for the set of defined list-objects filters.
+ * The `omitted` set is optional. It is populated with objects that the
+ * filter excludes. This set should not be considered finalized until
+ * after list_objects_filter__free is called on the returned `struct
+ * filter *`.
+ */
struct filter *list_objects_filter__init(
struct oidset *omitted,
struct list_objects_filter_options *filter_options);
@@ -80,7 +86,10 @@ enum list_objects_filter_result list_objects_filter__filter_object(
const char *filename,
struct filter *filter);
-/* Destroys `filter`. Does nothing if `filter` is null. */
+/*
+ * Destroys `filter` and finalizes the `omitted` set, if present. Does
+ * nothing if `filter` is null.
+ */
void list_objects_filter__free(struct filter *filter);
#endif /* LIST_OBJECTS_FILTER_H */
diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh
index acd7f5ab80..05d4f2e9c2 100755
--- a/t/t6112-rev-list-filters-objects.sh
+++ b/t/t6112-rev-list-filters-objects.sh
@@ -278,7 +278,19 @@ test_expect_success 'verify skipping tree iteration when not collecting omits' '
test_line_count = 2 actual &&
# Make sure no other trees were considered besides the root.
- ! grep "Skipping contents of tree [^.]" filter_trace
+ ! grep "Skipping contents of tree [^.]" filter_trace &&
+
+ # Try this again with "combine:". If both sub-filters are skipping
+ # trees, the composite filter should also skip trees. This is not
+ # important unless the user does combine:tree:X+tree:Y or another filter
+ # besides "tree:" is implemented in the future which can skip trees.
+ GIT_TRACE=1 git -C r3 rev-list \
+ --objects --filter=combine:tree:1+tree:3 HEAD 2>filter_trace &&
+
+ # Only skip the dir1/ tree, which is shared between the two commits.
+ grep "Skipping contents of tree " filter_trace >actual &&
+ test_write_lines "Skipping contents of tree dir1/..." >expected &&
+ test_cmp expected actual
'
# Test tree:# filters.
@@ -330,6 +342,112 @@ test_expect_success 'verify tree:3 includes everything expected' '
test_line_count = 10 actual
'
+test_expect_success 'combine:... for a simple combination' '
+ git -C r3 rev-list --objects --filter=combine:tree:2+blob:none HEAD \
+ >actual &&
+
+ expect_has HEAD "" &&
+ expect_has HEAD~1 "" &&
+ expect_has HEAD dir1 &&
+
+ # There are also 2 commit objects
+ test_line_count = 5 actual
+'
+
+test_expect_success 'combine:... with URL encoding' '
+ git -C r3 rev-list --objects \
+ --filter=combine:tree%3a2+blob:%6Eon%65 HEAD >actual &&
+
+ expect_has HEAD "" &&
+ expect_has HEAD~1 "" &&
+ expect_has HEAD dir1 &&
+
+ # There are also 2 commit objects
+ test_line_count = 5 actual
+'
+
+expect_invalid_filter_spec () {
+ spec="$1" &&
+ err="$2" &&
+
+ test_must_fail git -C r3 rev-list --objects --filter="$spec" HEAD \
+ >actual 2>actual_stderr &&
+ test_must_be_empty actual &&
+ test_i18ngrep "$err" actual_stderr
+}
+
+test_expect_success 'combine:... while URL-encoding things that should not be' '
+ expect_invalid_filter_spec combine%3Atree:2+blob:none \
+ "invalid filter-spec"
+'
+
+test_expect_success 'combine: with nothing after the :' '
+ expect_invalid_filter_spec combine: "expected something after combine:"
+'
+
+test_expect_success 'parse error in first sub-filter in combine:' '
+ expect_invalid_filter_spec combine:tree:asdf+blob:none \
+ "expected .tree:<depth>."
+'
+
+test_expect_success 'combine:... with non-encoded reserved chars' '
+ expect_invalid_filter_spec combine:tree:2+sparse:@xyz \
+ "must escape char in sub-filter-spec: .@." &&
+ expect_invalid_filter_spec combine:tree:2+sparse:\` \
+ "must escape char in sub-filter-spec: .\`." &&
+ expect_invalid_filter_spec combine:tree:2+sparse:~abc \
+ "must escape char in sub-filter-spec: .\~."
+'
+
+test_expect_success 'validate err msg for "combine:<valid-filter>+"' '
+ expect_invalid_filter_spec combine:tree:2+ "expected .tree:<depth>."
+'
+
+test_expect_success 'combine:... with edge-case hex digits: Ff Aa 0 9' '
+ git -C r3 rev-list --objects --filter="combine:tree:2+bl%6Fb:n%6fne" \
+ HEAD >actual &&
+ test_line_count = 5 actual &&
+ git -C r3 rev-list --objects --filter="combine:tree%3A2+blob%3anone" \
+ HEAD >actual &&
+ test_line_count = 5 actual &&
+ git -C r3 rev-list --objects --filter="combine:tree:%30" HEAD >actual &&
+ test_line_count = 2 actual &&
+ git -C r3 rev-list --objects --filter="combine:tree:%39+blob:none" \
+ HEAD >actual &&
+ test_line_count = 5 actual
+'
+
+test_expect_success 'add a sparse pattern blob whose path has reserved chars' '
+ cp r3/pattern r3/pattern1+renamed% &&
+ git -C r3 add pattern1+renamed% &&
+ git -C r3 commit -m "add sparse pattern file with reserved chars"
+'
+
+test_expect_success 'combine:... with more than two sub-filters' '
+ git -C r3 rev-list --objects \
+ --filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern \
+ HEAD >actual &&
+
+ expect_has HEAD "" &&
+ expect_has HEAD~1 "" &&
+ expect_has HEAD~2 "" &&
+ expect_has HEAD dir1 &&
+ expect_has HEAD dir1/sparse1 &&
+ expect_has HEAD dir1/sparse2 &&
+
+ # Should also have 3 commits
+ test_line_count = 9 actual &&
+
+ # Try again, this time making sure the last sub-filter is only
+ # URL-decoded once.
+ cp actual expect &&
+
+ git -C r3 rev-list --objects \
+ --filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern1%2brenamed%25 \
+ HEAD >actual &&
+ test_cmp expect actual
+'
+
# Test provisional omit collection logic with a repo that has objects appearing
# at multiple depths - first deeper than the filter's threshold, then shallow.
@@ -373,6 +491,37 @@ test_expect_success 'verify skipping tree iteration when collecting omits' '
test_cmp expect actual
'
+test_expect_success 'setup r5' '
+ git init r5 &&
+ mkdir -p r5/subdir &&
+
+ echo 1 >r5/short-root &&
+ echo 12345 >r5/long-root &&
+ echo a >r5/subdir/short-subdir &&
+ echo abcde >r5/subdir/long-subdir &&
+
+ git -C r5 add short-root long-root subdir &&
+ git -C r5 commit -m "commit msg"
+'
+
+test_expect_success 'verify collecting omits in combined: filter' '
+ # Note that this test guards against the naive implementation of simply
+ # giving both filters the same "omits" set and expecting it to
+ # automatically merge them.
+ git -C r5 rev-list --objects --quiet --filter-print-omitted \
+ --filter=combine:tree:2+blob:limit=3 HEAD >actual &&
+
+ # Expect 0 trees/commits, 3 blobs omitted (all blobs except short-root)
+ omitted_1=$(echo 12345 | git hash-object --stdin) &&
+ omitted_2=$(echo a | git hash-object --stdin) &&
+ omitted_3=$(echo abcde | git hash-object --stdin) &&
+
+ grep ~$omitted_1 actual &&
+ grep ~$omitted_2 actual &&
+ grep ~$omitted_3 actual &&
+ test_line_count = 3 actual
+'
+
# Test tree:<depth> where a tree is iterated to twice - once where a subentry is
# too deep to be included, and again where the blob inside it is shallow enough
# to be included. This makes sure we don't use LOFR_MARK_SEEN incorrectly (we
diff --git a/url.c b/url.c
index 1b8ef78cea..e34e5e7517 100644
--- a/url.c
+++ b/url.c
@@ -86,6 +86,12 @@ char *url_decode_mem(const char *url, int len)
return url_decode_internal(&url, len, NULL, &out, 0);
}
+char *url_percent_decode(const char *encoded)
+{
+ struct strbuf out = STRBUF_INIT;
+ return url_decode_internal(&encoded, strlen(encoded), NULL, &out, 0);
+}
+
char *url_decode_parameter_name(const char **query)
{
struct strbuf out = STRBUF_INIT;
diff --git a/url.h b/url.h
index 00b7d58c33..2a27c34277 100644
--- a/url.h
+++ b/url.h
@@ -7,6 +7,14 @@ int is_url(const char *url);
int is_urlschemechar(int first_flag, int ch);
char *url_decode(const char *url);
char *url_decode_mem(const char *url, int len);
+
+/*
+ * Similar to the url_decode_{,mem} methods above, but doesn't assume there
+ * is a scheme followed by a : at the start of the string. Instead, %-sequences
+ * before any : are also parsed.
+ */
+char *url_percent_decode(const char *encoded);
+
char *url_decode_parameter_name(const char **query);
char *url_decode_parameter_value(const char **query);