diff options
author | Junio C Hamano <gitster@pobox.com> | 2019-09-18 11:50:09 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2019-09-18 11:50:09 -0700 |
commit | 627b82683447e299fc2e20140318c276efbf7de2 (patch) | |
tree | fd0da2b151024ebe5a71db00e75ec8a41eff9fd6 | |
parent | Merge branch 'cc/multi-promisor' (diff) | |
parent | list-objects-filter-options: make parser void (diff) | |
download | tgif-627b82683447e299fc2e20140318c276efbf7de2.tar.xz |
Merge branch 'md/list-objects-filter-combo'
The list-objects-filter API (used to create a sparse/lazy clone)
learned to take a combined filter specification.
* md/list-objects-filter-combo:
list-objects-filter-options: make parser void
list-objects-filter-options: clean up use of ALLOC_GROW
list-objects-filter-options: allow mult. --filter
strbuf: give URL-encoding API a char predicate fn
list-objects-filter-options: make filter_spec a string_list
list-objects-filter-options: move error check up
list-objects-filter: implement composite filters
list-objects-filter-options: always supply *errbuf
list-objects-filter: put omits set in filter struct
list-objects-filter: encapsulate filter components
-rw-r--r-- | Documentation/rev-list-options.txt | 16 | ||||
-rw-r--r-- | builtin/clone.c | 8 | ||||
-rw-r--r-- | builtin/fetch.c | 9 | ||||
-rw-r--r-- | builtin/rev-list.c | 6 | ||||
-rw-r--r-- | cache.h | 22 | ||||
-rw-r--r-- | credential-store.c | 9 | ||||
-rw-r--r-- | fetch-pack.c | 20 | ||||
-rw-r--r-- | http.c | 6 | ||||
-rw-r--r-- | list-objects-filter-options.c | 275 | ||||
-rw-r--r-- | list-objects-filter-options.h | 57 | ||||
-rw-r--r-- | list-objects-filter.c | 335 | ||||
-rw-r--r-- | list-objects-filter.h | 40 | ||||
-rw-r--r-- | list-objects.c | 55 | ||||
-rw-r--r-- | strbuf.c | 15 | ||||
-rw-r--r-- | strbuf.h | 7 | ||||
-rwxr-xr-x | t/t5616-partial-clone.sh | 19 | ||||
-rwxr-xr-x | t/t6112-rev-list-filters-objects.sh | 194 | ||||
-rw-r--r-- | transport-helper.c | 10 | ||||
-rw-r--r-- | transport.c | 1 | ||||
-rw-r--r-- | upload-pack.c | 13 | ||||
-rw-r--r-- | url.c | 6 | ||||
-rw-r--r-- | url.h | 8 |
22 files changed, 889 insertions, 242 deletions
diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index bb1251c036..90ff9e2bea 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -756,6 +756,22 @@ explicitly-given commit or tree. Note that the form '--filter=sparse:path=<path>' that wants to read from an arbitrary path on the filesystem has been dropped for security reasons. ++ +Multiple '--filter=' flags can be specified to combine filters. Only +objects which are accepted by every filter are included. ++ +The form '--filter=combine:<filter1>+<filter2>+...<filterN>' can also be +used to combined several filters, but this is harder than just repeating +the '--filter' flag and is usually not necessary. Filters are joined by +'{plus}' and individual filters are %-encoded (i.e. URL-encoded). +Besides the '{plus}' and '%' characters, the following characters are +reserved and also must be encoded: `~!@#$^&*()[]{}\;",<>?`+'`+ +as well as all characters with ASCII code <= `0x20`, which includes +space and newline. ++ +Other arbitrary characters can also be encoded. For instance, +'combine:tree:3+blob:none' and 'combine:tree%3A3+blob%3Anone' are +equivalent. --no-filter:: Turn off any previous `--filter=` argument. diff --git a/builtin/clone.c b/builtin/clone.c index f665b28ccc..2048b6760a 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1160,13 +1160,11 @@ int cmd_clone(int argc, const char **argv, const char *prefix) transport->server_options = &server_options; if (filter_options.choice) { - struct strbuf expanded_filter_spec = STRBUF_INIT; - expand_list_objects_filter_spec(&filter_options, - &expanded_filter_spec); + const char *spec = + expand_list_objects_filter_spec(&filter_options); transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, - expanded_filter_spec.buf); + spec); transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); - strbuf_release(&expanded_filter_spec); } if (transport->smart_options && !deepen && !filter_options.choice) diff --git a/builtin/fetch.c b/builtin/fetch.c index 538f0e7207..9b27ae9681 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1243,13 +1243,10 @@ static struct transport *prepare_transport(struct remote *remote, int deepen) if (update_shallow) set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes"); if (filter_options.choice) { - struct strbuf expanded_filter_spec = STRBUF_INIT; - expand_list_objects_filter_spec(&filter_options, - &expanded_filter_spec); - set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, - expanded_filter_spec.buf); + const char *spec = + expand_list_objects_filter_spec(&filter_options); + set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, spec); set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); - strbuf_release(&expanded_filter_spec); } if (negotiation_tip.nr) { if (transport->smart_options) diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 301ccb970b..b8dc2e1fba 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -473,8 +473,10 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) die(_("object filtering requires --objects")); if (filter_options.choice == LOFC_SPARSE_OID && !filter_options.sparse_oid_value) - die(_("invalid sparse value '%s'"), - filter_options.filter_spec); + die( + _("invalid sparse value '%s'"), + list_objects_filter_spec( + &filter_options)); continue; } if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) { @@ -636,6 +636,9 @@ int daemonize(void); * at least 'nr' entries; the number of entries currently allocated * is 'alloc', using the standard growing factor alloc_nr() macro. * + * Consider using ALLOC_GROW_BY instead of ALLOC_GROW as it has some + * added niceties. + * * DO NOT USE any expression with side-effect for 'x', 'nr', or 'alloc'. */ #define ALLOC_GROW(x, nr, alloc) \ @@ -649,6 +652,25 @@ int daemonize(void); } \ } while (0) +/* + * Similar to ALLOC_GROW but handles updating of the nr value and + * zeroing the bytes of the newly-grown array elements. + * + * DO NOT USE any expression with side-effect for any of the + * arguments. + */ +#define ALLOC_GROW_BY(x, nr, increase, alloc) \ + do { \ + if (increase) { \ + size_t new_nr = nr + (increase); \ + if (new_nr < nr) \ + BUG("negative growth in ALLOC_GROW_BY"); \ + ALLOC_GROW(x, new_nr, alloc); \ + memset((x) + nr, 0, sizeof(*(x)) * (increase)); \ + nr = new_nr; \ + } \ + } while (0) + /* Initialize and use the cache information */ struct lock_file; void preload_index(struct index_state *index, diff --git a/credential-store.c b/credential-store.c index ac295420dd..c010497cb2 100644 --- a/credential-store.c +++ b/credential-store.c @@ -72,15 +72,16 @@ static void store_credential_file(const char *fn, struct credential *c) struct strbuf buf = STRBUF_INIT; strbuf_addf(&buf, "%s://", c->protocol); - strbuf_addstr_urlencode(&buf, c->username, 1); + strbuf_addstr_urlencode(&buf, c->username, is_rfc3986_unreserved); strbuf_addch(&buf, ':'); - strbuf_addstr_urlencode(&buf, c->password, 1); + strbuf_addstr_urlencode(&buf, c->password, is_rfc3986_unreserved); strbuf_addch(&buf, '@'); if (c->host) - strbuf_addstr_urlencode(&buf, c->host, 1); + strbuf_addstr_urlencode(&buf, c->host, is_rfc3986_unreserved); if (c->path) { strbuf_addch(&buf, '/'); - strbuf_addstr_urlencode(&buf, c->path, 0); + strbuf_addstr_urlencode(&buf, c->path, + is_rfc3986_reserved_or_unreserved); } rewrite_credential_file(fn, c, &buf); diff --git a/fetch-pack.c b/fetch-pack.c index d81f47c07b..6ccc6294ea 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -338,12 +338,9 @@ static int find_common(struct fetch_negotiator *negotiator, } } if (server_supports_filtering && args->filter_options.choice) { - struct strbuf expanded_filter_spec = STRBUF_INIT; - expand_list_objects_filter_spec(&args->filter_options, - &expanded_filter_spec); - packet_buf_write(&req_buf, "filter %s", - expanded_filter_spec.buf); - strbuf_release(&expanded_filter_spec); + const char *spec = + expand_list_objects_filter_spec(&args->filter_options); + packet_buf_write(&req_buf, "filter %s", spec); } packet_buf_flush(&req_buf); state_len = req_buf.len; @@ -1112,7 +1109,7 @@ static int add_haves(struct fetch_negotiator *negotiator, } static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, - const struct fetch_pack_args *args, + struct fetch_pack_args *args, const struct ref *wants, struct oidset *common, int *haves_to_send, int *in_vain, int sideband_all) @@ -1153,13 +1150,10 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, /* Add filter */ if (server_supports_feature("fetch", "filter", 0) && args->filter_options.choice) { - struct strbuf expanded_filter_spec = STRBUF_INIT; + const char *spec = + expand_list_objects_filter_spec(&args->filter_options); print_verbose(args, _("Server supports filter")); - expand_list_objects_filter_spec(&args->filter_options, - &expanded_filter_spec); - packet_buf_write(&req_buf, "filter %s", - expanded_filter_spec.buf); - strbuf_release(&expanded_filter_spec); + packet_buf_write(&req_buf, "filter %s", spec); } else if (args->filter_options.choice) { warning("filtering not recognized by server, ignoring"); } @@ -513,9 +513,11 @@ static void set_proxyauth_name_password(CURL *result) #else struct strbuf s = STRBUF_INIT; - strbuf_addstr_urlencode(&s, proxy_auth.username, 1); + strbuf_addstr_urlencode(&s, proxy_auth.username, + is_rfc3986_unreserved); strbuf_addch(&s, ':'); - strbuf_addstr_urlencode(&s, proxy_auth.password, 1); + strbuf_addstr_urlencode(&s, proxy_auth.password, + is_rfc3986_unreserved); curl_proxyuserpwd = strbuf_detach(&s, NULL); curl_easy_setopt(result, CURLOPT_PROXYUSERPWD, curl_proxyuserpwd); #endif diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 28c571f922..4d88bfe64a 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -7,6 +7,13 @@ #include "list-objects-filter.h" #include "list-objects-filter-options.h" #include "promisor-remote.h" +#include "trace.h" +#include "url.h" + +static int parse_combine_filter( + struct list_objects_filter_options *filter_options, + const char *arg, + struct strbuf *errbuf); /* * Parse value of the argument to the "filter" keyword. @@ -33,16 +40,8 @@ static int gently_parse_list_objects_filter( if (!arg) return 0; - if (filter_options->choice) { - if (errbuf) { - strbuf_addstr( - errbuf, - _("multiple filter-specs cannot be combined")); - } - return 1; - } - - filter_options->filter_spec = strdup(arg); + if (filter_options->choice) + BUG("filter_options already populated"); if (!strcmp(arg, "blob:none")) { filter_options->choice = LOFC_BLOB_NONE; @@ -56,11 +55,7 @@ static int gently_parse_list_objects_filter( } else if (skip_prefix(arg, "tree:", &v0)) { if (!git_parse_ulong(v0, &filter_options->tree_exclude_depth)) { - if (errbuf) { - strbuf_addstr( - errbuf, - _("expected 'tree:<depth>'")); - } + strbuf_addstr(errbuf, _("expected 'tree:<depth>'")); return 1; } filter_options->choice = LOFC_TREE_DEPTH; @@ -88,67 +83,253 @@ static int gently_parse_list_objects_filter( _("sparse:path filters support has been dropped")); } return 1; + + } else if (skip_prefix(arg, "combine:", &v0)) { + return parse_combine_filter(filter_options, v0, errbuf); + } /* * Please update _git_fetch() in git-completion.bash when you * add new filters */ - if (errbuf) - strbuf_addf(errbuf, _("invalid filter-spec '%s'"), arg); + strbuf_addf(errbuf, _("invalid filter-spec '%s'"), arg); memset(filter_options, 0, sizeof(*filter_options)); return 1; } -int parse_list_objects_filter(struct list_objects_filter_options *filter_options, - const char *arg) +static const char *RESERVED_NON_WS = "~`!@#$^&*()[]{}\\;'\",<>?"; + +static int has_reserved_character( + struct strbuf *sub_spec, struct strbuf *errbuf) { - struct strbuf buf = STRBUF_INIT; - if (gently_parse_list_objects_filter(filter_options, arg, &buf)) - die("%s", buf.buf); + const char *c = sub_spec->buf; + while (*c) { + if (*c <= ' ' || strchr(RESERVED_NON_WS, *c)) { + strbuf_addf( + errbuf, + _("must escape char in sub-filter-spec: '%c'"), + *c); + return 1; + } + c++; + } + return 0; } +static int parse_combine_subfilter( + struct list_objects_filter_options *filter_options, + struct strbuf *subspec, + struct strbuf *errbuf) +{ + size_t new_index = filter_options->sub_nr; + char *decoded; + int result; + + ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1, + filter_options->sub_alloc); + + decoded = url_percent_decode(subspec->buf); + + result = has_reserved_character(subspec, errbuf) || + gently_parse_list_objects_filter( + &filter_options->sub[new_index], decoded, errbuf); + + free(decoded); + return result; +} + +static int parse_combine_filter( + struct list_objects_filter_options *filter_options, + const char *arg, + struct strbuf *errbuf) +{ + struct strbuf **subspecs = strbuf_split_str(arg, '+', 0); + size_t sub; + int result = 0; + + if (!subspecs[0]) { + strbuf_addstr(errbuf, _("expected something after combine:")); + result = 1; + goto cleanup; + } + + for (sub = 0; subspecs[sub] && !result; sub++) { + if (subspecs[sub + 1]) { + /* + * This is not the last subspec. Remove trailing "+" so + * we can parse it. + */ + size_t last = subspecs[sub]->len - 1; + assert(subspecs[sub]->buf[last] == '+'); + strbuf_remove(subspecs[sub], last, 1); + } + result = parse_combine_subfilter( + filter_options, subspecs[sub], errbuf); + } + + filter_options->choice = LOFC_COMBINE; + +cleanup: + strbuf_list_free(subspecs); + if (result) { + list_objects_filter_release(filter_options); + memset(filter_options, 0, sizeof(*filter_options)); + } + return result; +} + +static int allow_unencoded(char ch) +{ + if (ch <= ' ' || ch == '%' || ch == '+') + return 0; + return !strchr(RESERVED_NON_WS, ch); +} + +static void filter_spec_append_urlencode( + struct list_objects_filter_options *filter, const char *raw) +{ + struct strbuf buf = STRBUF_INIT; + strbuf_addstr_urlencode(&buf, raw, allow_unencoded); + trace_printf("Add to combine filter-spec: %s\n", buf.buf); + string_list_append(&filter->filter_spec, strbuf_detach(&buf, NULL)); +} + +/* + * Changes filter_options into an equivalent LOFC_COMBINE filter options + * instance. Does not do anything if filter_options is already LOFC_COMBINE. + */ +static void transform_to_combine_type( + struct list_objects_filter_options *filter_options) +{ + assert(filter_options->choice); + if (filter_options->choice == LOFC_COMBINE) + return; + { + const int initial_sub_alloc = 2; + struct list_objects_filter_options *sub_array = + xcalloc(initial_sub_alloc, sizeof(*sub_array)); + sub_array[0] = *filter_options; + memset(filter_options, 0, sizeof(*filter_options)); + filter_options->sub = sub_array; + filter_options->sub_alloc = initial_sub_alloc; + } + filter_options->sub_nr = 1; + filter_options->choice = LOFC_COMBINE; + string_list_append(&filter_options->filter_spec, xstrdup("combine:")); + filter_spec_append_urlencode( + filter_options, + list_objects_filter_spec(&filter_options->sub[0])); + /* + * We don't need the filter_spec strings for subfilter specs, only the + * top level. + */ + string_list_clear(&filter_options->sub[0].filter_spec, /*free_util=*/0); +} + +void list_objects_filter_die_if_populated( + struct list_objects_filter_options *filter_options) +{ + if (filter_options->choice) + die(_("multiple filter-specs cannot be combined")); +} + +void parse_list_objects_filter( + struct list_objects_filter_options *filter_options, + const char *arg) +{ + struct strbuf errbuf = STRBUF_INIT; + int parse_error; + + if (!filter_options->choice) { + string_list_append(&filter_options->filter_spec, xstrdup(arg)); + + parse_error = gently_parse_list_objects_filter( + filter_options, arg, &errbuf); + } else { + /* + * Make filter_options an LOFC_COMBINE spec so we can trivially + * add subspecs to it. + */ + transform_to_combine_type(filter_options); + + string_list_append(&filter_options->filter_spec, xstrdup("+")); + filter_spec_append_urlencode(filter_options, arg); + ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1, + filter_options->sub_alloc); + + parse_error = gently_parse_list_objects_filter( + &filter_options->sub[filter_options->sub_nr - 1], arg, + &errbuf); + } + if (parse_error) + die("%s", errbuf.buf); +} + int opt_parse_list_objects_filter(const struct option *opt, const char *arg, int unset) { struct list_objects_filter_options *filter_options = opt->value; - if (unset || !arg) { + if (unset || !arg) list_objects_filter_set_no_filter(filter_options); - return 0; + else + parse_list_objects_filter(filter_options, arg); + return 0; +} + +const char *list_objects_filter_spec(struct list_objects_filter_options *filter) +{ + if (!filter->filter_spec.nr) + BUG("no filter_spec available for this filter"); + if (filter->filter_spec.nr != 1) { + struct strbuf concatted = STRBUF_INIT; + strbuf_add_separated_string_list( + &concatted, "", &filter->filter_spec); + string_list_clear(&filter->filter_spec, /*free_util=*/0); + string_list_append( + &filter->filter_spec, strbuf_detach(&concatted, NULL)); } - return parse_list_objects_filter(filter_options, arg); + return filter->filter_spec.items[0].string; } -void expand_list_objects_filter_spec( - const struct list_objects_filter_options *filter, - struct strbuf *expanded_spec) +const char *expand_list_objects_filter_spec( + struct list_objects_filter_options *filter) { - strbuf_init(expanded_spec, strlen(filter->filter_spec)); - if (filter->choice == LOFC_BLOB_LIMIT) - strbuf_addf(expanded_spec, "blob:limit=%lu", + if (filter->choice == LOFC_BLOB_LIMIT) { + struct strbuf expanded_spec = STRBUF_INIT; + strbuf_addf(&expanded_spec, "blob:limit=%lu", filter->blob_limit_value); - else if (filter->choice == LOFC_TREE_DEPTH) - strbuf_addf(expanded_spec, "tree:%lu", - filter->tree_exclude_depth); - else - strbuf_addstr(expanded_spec, filter->filter_spec); + string_list_clear(&filter->filter_spec, /*free_util=*/0); + string_list_append( + &filter->filter_spec, + strbuf_detach(&expanded_spec, NULL)); + } + + return list_objects_filter_spec(filter); } void list_objects_filter_release( struct list_objects_filter_options *filter_options) { - free(filter_options->filter_spec); + size_t sub; + + if (!filter_options) + return; + string_list_clear(&filter_options->filter_spec, /*free_util=*/0); free(filter_options->sparse_oid_value); + for (sub = 0; sub < filter_options->sub_nr; sub++) + list_objects_filter_release(&filter_options->sub[sub]); + free(filter_options->sub); memset(filter_options, 0, sizeof(*filter_options)); } void partial_clone_register( const char *remote, - const struct list_objects_filter_options *filter_options) + struct list_objects_filter_options *filter_options) { char *cfg_name; char *filter_name; @@ -168,7 +349,9 @@ void partial_clone_register( * the default for subsequent fetches from this remote. */ filter_name = xstrfmt("remote.%s.partialclonefilter", remote); - git_config_set(filter_name, filter_options->filter_spec); + /* NEEDSWORK: 'expand' result leaking??? */ + git_config_set(filter_name, + expand_list_objects_filter_spec(filter_options)); free(filter_name); /* Make sure the config info are reset */ @@ -180,12 +363,18 @@ void partial_clone_get_default_filter_spec( const char *remote) { struct promisor_remote *promisor = promisor_remote_find(remote); + struct strbuf errbuf = STRBUF_INIT; /* * Parse default value, but silently ignore it if it is invalid. */ - if (promisor) - gently_parse_list_objects_filter(filter_options, - promisor->partial_clone_filter, - NULL); + if (!promisor) + return; + + string_list_append(&filter_options->filter_spec, + promisor->partial_clone_filter); + gently_parse_list_objects_filter(filter_options, + promisor->partial_clone_filter, + &errbuf); + strbuf_release(&errbuf); } diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index 8deaa287b5..b63c5ee1a3 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -2,7 +2,7 @@ #define LIST_OBJECTS_FILTER_OPTIONS_H #include "parse-options.h" -#include "strbuf.h" +#include "string-list.h" /* * The list of defined filters for list-objects. @@ -13,6 +13,7 @@ enum list_objects_filter_choice { LOFC_BLOB_LIMIT, LOFC_TREE_DEPTH, LOFC_SPARSE_OID, + LOFC_COMBINE, LOFC__COUNT /* must be last */ }; @@ -23,8 +24,10 @@ struct list_objects_filter_options { * commands that launch filtering sub-processes, or for communication * over the network, don't use this value; use the result of * expand_list_objects_filter_spec() instead. + * To get the raw filter spec given by the user, use the result of + * list_objects_filter_spec(). */ - char *filter_spec; + struct string_list filter_spec; /* * 'choice' is determined by parsing the filter-spec. This indicates @@ -38,19 +41,40 @@ struct list_objects_filter_options { unsigned int no_filter : 1; /* - * Parsed values (fields) from within the filter-spec. These are - * choice-specific; not all values will be defined for any given - * choice. + * BEGIN choice-specific parsed values from within the filter-spec. Only + * some values will be defined for any given choice. */ + struct object_id *sparse_oid_value; unsigned long blob_limit_value; unsigned long tree_exclude_depth; + + /* LOFC_COMBINE values */ + + /* This array contains all the subfilters which this filter combines. */ + size_t sub_nr, sub_alloc; + struct list_objects_filter_options *sub; + + /* + * END choice-specific parsed values. + */ }; /* Normalized command line arguments */ #define CL_ARG__FILTER "filter" -int parse_list_objects_filter( +void list_objects_filter_die_if_populated( + struct list_objects_filter_options *filter_options); + +/* + * Parses the filter spec string given by arg and either (1) simply places the + * result in filter_options if it is not yet populated or (2) combines it with + * the filter already in filter_options if it is already populated. In the case + * of (2), the filter specs are combined as if specified with 'combine:'. + * + * Dies and prints a user-facing message if an error occurs. + */ +void parse_list_objects_filter( struct list_objects_filter_options *filter_options, const char *arg); @@ -65,13 +89,22 @@ int opt_parse_list_objects_filter(const struct option *opt, /* * Translates abbreviated numbers in the filter's filter_spec into their * fully-expanded forms (e.g., "limit:blob=1k" becomes "limit:blob=1024"). + * Returns a string owned by the list_objects_filter_options object. * - * This form should be used instead of the raw filter_spec field when - * communicating with a remote process or subprocess. + * This form should be used instead of the raw list_objects_filter_spec() + * value when communicating with a remote process or subprocess. */ -void expand_list_objects_filter_spec( - const struct list_objects_filter_options *filter, - struct strbuf *expanded_spec); +const char *expand_list_objects_filter_spec( + struct list_objects_filter_options *filter); + +/* + * Returns the filter spec string more or less in the form as the user + * entered it. This form of the filter_spec can be used in user-facing + * messages. Returns a string owned by the list_objects_filter_options + * object. + */ +const char *list_objects_filter_spec( + struct list_objects_filter_options *filter); void list_objects_filter_release( struct list_objects_filter_options *filter_options); @@ -85,7 +118,7 @@ static inline void list_objects_filter_set_no_filter( void partial_clone_register( const char *remote, - const struct list_objects_filter_options *filter_options); + struct list_objects_filter_options *filter_options); void partial_clone_get_default_filter_spec( struct list_objects_filter_options *filter_options, const char *remote); diff --git a/list-objects-filter.c b/list-objects-filter.c index 36e1f774bc..d664264d65 100644 --- a/list-objects-filter.c +++ b/list-objects-filter.c @@ -26,11 +26,46 @@ */ #define FILTER_SHOWN_BUT_REVISIT (1<<21) -/* - * A filter for list-objects to omit ALL blobs from the traversal. - * And to OPTIONALLY collect a list of the omitted OIDs. - */ -struct filter_blobs_none_data { +struct subfilter { + struct filter *filter; + struct oidset seen; + struct oidset omits; + struct object_id skip_tree; + unsigned is_skipping_tree : 1; +}; + +struct filter { + enum list_objects_filter_result (*filter_object_fn)( + struct repository *r, + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + struct oidset *omits, + void *filter_data); + + /* + * Optional. If this function is supplied and the filter needs + * to collect omits, then this function is called once before + * free_fn is called. + * + * This is required because the following two conditions hold: + * + * a. A tree filter can add and remove objects as an object + * graph is traversed. + * b. A combine filter's omit set is the union of all its + * subfilters, which may include tree: filters. + * + * As such, the omits sets must be separate sets, and can only + * be unioned after the traversal is completed. + */ + void (*finalize_omits_fn)(struct oidset *omits, void *filter_data); + + void (*free_fn)(void *filter_data); + + void *filter_data; + + /* If non-NULL, the filter collects a list of the omitted OIDs here. */ struct oidset *omits; }; @@ -40,10 +75,9 @@ static enum list_objects_filter_result filter_blobs_none( struct object *obj, const char *pathname, const char *filename, + struct oidset *omits, void *filter_data_) { - struct filter_blobs_none_data *filter_data = filter_data_; - switch (filter_situation) { default: BUG("unknown filter_situation: %d", filter_situation); @@ -61,24 +95,18 @@ static enum list_objects_filter_result filter_blobs_none( assert(obj->type == OBJ_BLOB); assert((obj->flags & SEEN) == 0); - if (filter_data->omits) - oidset_insert(filter_data->omits, &obj->oid); + if (omits) + oidset_insert(omits, &obj->oid); return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */ } } -static void *filter_blobs_none__init( - struct oidset *omitted, +static void filter_blobs_none__init( struct list_objects_filter_options *filter_options, - filter_object_fn *filter_fn, - filter_free_fn *filter_free_fn) + struct filter *filter) { - struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d)); - d->omits = omitted; - - *filter_fn = filter_blobs_none; - *filter_free_fn = free; - return d; + filter->filter_object_fn = filter_blobs_none; + filter->free_fn = free; } /* @@ -86,8 +114,6 @@ static void *filter_blobs_none__init( * Can OPTIONALLY collect a list of the omitted OIDs. */ struct filter_trees_depth_data { - struct oidset *omits; - /* * Maps trees to the minimum depth at which they were seen. It is not * necessary to re-traverse a tree at deeper or equal depths than it has @@ -110,16 +136,16 @@ struct seen_map_entry { /* Returns 1 if the oid was in the omits set before it was invoked. */ static int filter_trees_update_omits( struct object *obj, - struct filter_trees_depth_data *filter_data, + struct oidset *omits, int include_it) { - if (!filter_data->omits) + if (!omits) return 0; if (include_it) - return oidset_remove(filter_data->omits, &obj->oid); + return oidset_remove(omits, &obj->oid); else - return oidset_insert(filter_data->omits, &obj->oid); + return oidset_insert(omits, &obj->oid); } static enum list_objects_filter_result filter_trees_depth( @@ -128,6 +154,7 @@ static enum list_objects_filter_result filter_trees_depth( struct object *obj, const char *pathname, const char *filename, + struct oidset *omits, void *filter_data_) { struct filter_trees_depth_data *filter_data = filter_data_; @@ -152,7 +179,7 @@ static enum list_objects_filter_result filter_trees_depth( return LOFR_ZERO; case LOFS_BLOB: - filter_trees_update_omits(obj, filter_data, include_it); + filter_trees_update_omits(obj, omits, include_it); return include_it ? LOFR_MARK_SEEN | LOFR_DO_SHOW : LOFR_ZERO; case LOFS_BEGIN_TREE: @@ -173,12 +200,12 @@ static enum list_objects_filter_result filter_trees_depth( filter_res = LOFR_SKIP_TREE; } else { int been_omitted = filter_trees_update_omits( - obj, filter_data, include_it); + obj, omits, include_it); seen_info->depth = filter_data->current_depth; if (include_it) filter_res = LOFR_DO_SHOW; - else if (filter_data->omits && !been_omitted) + else if (omits && !been_omitted) /* * Must update omit information of children * recursively; they have not been omitted yet. @@ -201,21 +228,18 @@ static void filter_trees_free(void *filter_data) { free(d); } -static void *filter_trees_depth__init( - struct oidset *omitted, +static void filter_trees_depth__init( struct list_objects_filter_options *filter_options, - filter_object_fn *filter_fn, - filter_free_fn *filter_free_fn) + struct filter *filter) { struct filter_trees_depth_data *d = xcalloc(1, sizeof(*d)); - d->omits = omitted; oidmap_init(&d->seen_at_depth, 0); d->exclude_depth = filter_options->tree_exclude_depth; d->current_depth = 0; - *filter_fn = filter_trees_depth; - *filter_free_fn = filter_trees_free; - return d; + filter->filter_data = d; + filter->filter_object_fn = filter_trees_depth; + filter->free_fn = filter_trees_free; } /* @@ -223,7 +247,6 @@ static void *filter_trees_depth__init( * And to OPTIONALLY collect a list of the omitted OIDs. */ struct filter_blobs_limit_data { - struct oidset *omits; unsigned long max_bytes; }; @@ -233,6 +256,7 @@ static enum list_objects_filter_result filter_blobs_limit( struct object *obj, const char *pathname, const char *filename, + struct oidset *omits, void *filter_data_) { struct filter_blobs_limit_data *filter_data = filter_data_; @@ -270,30 +294,27 @@ static enum list_objects_filter_result filter_blobs_limit( if (object_length < filter_data->max_bytes) goto include_it; - if (filter_data->omits) - oidset_insert(filter_data->omits, &obj->oid); + if (omits) + oidset_insert(omits, &obj->oid); return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */ } include_it: - if (filter_data->omits) - oidset_remove(filter_data->omits, &obj->oid); + if (omits) + oidset_remove(omits, &obj->oid); return LOFR_MARK_SEEN | LOFR_DO_SHOW; } -static void *filter_blobs_limit__init( - struct oidset *omitted, +static void filter_blobs_limit__init( struct list_objects_filter_options *filter_options, - filter_object_fn *filter_fn, - filter_free_fn *filter_free_fn) + struct filter *filter) { struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d)); - d->omits = omitted; d->max_bytes = filter_options->blob_limit_value; - *filter_fn = filter_blobs_limit; - *filter_free_fn = free; - return d; + filter->filter_data = d; + filter->filter_object_fn = filter_blobs_limit; + filter->free_fn = free; } /* @@ -326,7 +347,6 @@ struct frame { }; struct filter_sparse_data { - struct oidset *omits; struct exclude_list el; size_t nr, alloc; @@ -339,6 +359,7 @@ static enum list_objects_filter_result filter_sparse( struct object *obj, const char *pathname, const char *filename, + struct oidset *omits, void *filter_data_) { struct filter_sparse_data *filter_data = filter_data_; @@ -420,8 +441,8 @@ static enum list_objects_filter_result filter_sparse( if (val < 0) val = frame->defval; if (val > 0) { - if (filter_data->omits) - oidset_remove(filter_data->omits, &obj->oid); + if (omits) + oidset_remove(omits, &obj->oid); return LOFR_MARK_SEEN | LOFR_DO_SHOW; } @@ -435,8 +456,8 @@ static enum list_objects_filter_result filter_sparse( * Leave the LOFR_ bits unset so that if the blob appears * again in the traversal, we will be asked again. */ - if (filter_data->omits) - oidset_insert(filter_data->omits, &obj->oid); + if (omits) + oidset_insert(omits, &obj->oid); /* * Remember that at least 1 blob in this tree was @@ -456,14 +477,11 @@ static void filter_sparse_free(void *filter_data) free(d); } -static void *filter_sparse_oid__init( - struct oidset *omitted, +static void filter_sparse_oid__init( struct list_objects_filter_options *filter_options, - filter_object_fn *filter_fn, - filter_free_fn *filter_free_fn) + struct filter *filter) { struct filter_sparse_data *d = xcalloc(1, sizeof(*d)); - d->omits = omitted; if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value, NULL, 0, &d->el) < 0) die("could not load filter specification"); @@ -473,16 +491,147 @@ static void *filter_sparse_oid__init( d->array_frame[d->nr].child_prov_omit = 0; d->nr++; - *filter_fn = filter_sparse; - *filter_free_fn = filter_sparse_free; - return d; + filter->filter_data = d; + filter->filter_object_fn = filter_sparse; + filter->free_fn = filter_sparse_free; } -typedef void *(*filter_init_fn)( - struct oidset *omitted, +/* A filter which only shows objects shown by all sub-filters. */ +struct combine_filter_data { + struct subfilter *sub; + size_t nr; +}; + +static enum list_objects_filter_result process_subfilter( + struct repository *r, + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + struct subfilter *sub) +{ + enum list_objects_filter_result result; + + /* + * Check and update is_skipping_tree before oidset_contains so + * that is_skipping_tree gets unset even when the object is + * marked as seen. As of this writing, no filter uses + * LOFR_MARK_SEEN on trees that also uses LOFR_SKIP_TREE, so the + * ordering is only theoretically important. Be cautious if you + * change the order of the below checks and more filters have + * been added! + */ + if (sub->is_skipping_tree) { + if (filter_situation == LOFS_END_TREE && + oideq(&obj->oid, &sub->skip_tree)) + sub->is_skipping_tree = 0; + else + return LOFR_ZERO; + } + if (oidset_contains(&sub->seen, &obj->oid)) + return LOFR_ZERO; + + result = list_objects_filter__filter_object( + r, filter_situation, obj, pathname, filename, sub->filter); + + if (result & LOFR_MARK_SEEN) + oidset_insert(&sub->seen, &obj->oid); + + if (result & LOFR_SKIP_TREE) { + sub->is_skipping_tree = 1; + sub->skip_tree = obj->oid; + } + + return result; +} + +static enum list_objects_filter_result filter_combine( + struct repository *r, + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + struct oidset *omits, + void *filter_data) +{ + struct combine_filter_data *d = filter_data; + enum list_objects_filter_result combined_result = + LOFR_DO_SHOW | LOFR_MARK_SEEN | LOFR_SKIP_TREE; + size_t sub; + + for (sub = 0; sub < d->nr; sub++) { + enum list_objects_filter_result sub_result = process_subfilter( + r, filter_situation, obj, pathname, filename, + &d->sub[sub]); + if (!(sub_result & LOFR_DO_SHOW)) + combined_result &= ~LOFR_DO_SHOW; + if (!(sub_result & LOFR_MARK_SEEN)) + combined_result &= ~LOFR_MARK_SEEN; + if (!d->sub[sub].is_skipping_tree) + combined_result &= ~LOFR_SKIP_TREE; + } + + return combined_result; +} + +static void filter_combine__free(void *filter_data) +{ + struct combine_filter_data *d = filter_data; + size_t sub; + for (sub = 0; sub < d->nr; sub++) { + list_objects_filter__free(d->sub[sub].filter); + oidset_clear(&d->sub[sub].seen); + if (d->sub[sub].omits.set.size) + BUG("expected oidset to be cleared already"); + } + free(d->sub); +} + +static void add_all(struct oidset *dest, struct oidset *src) { + struct oidset_iter iter; + struct object_id *src_oid; + + oidset_iter_init(src, &iter); + while ((src_oid = oidset_iter_next(&iter)) != NULL) + oidset_insert(dest, src_oid); +} + +static void filter_combine__finalize_omits( + struct oidset *omits, + void *filter_data) +{ + struct combine_filter_data *d = filter_data; + size_t sub; + + for (sub = 0; sub < d->nr; sub++) { + add_all(omits, &d->sub[sub].omits); + oidset_clear(&d->sub[sub].omits); + } +} + +static void filter_combine__init( struct list_objects_filter_options *filter_options, - filter_object_fn *filter_fn, - filter_free_fn *filter_free_fn); + struct filter* filter) +{ + struct combine_filter_data *d = xcalloc(1, sizeof(*d)); + size_t sub; + + d->nr = filter_options->sub_nr; + d->sub = xcalloc(d->nr, sizeof(*d->sub)); + for (sub = 0; sub < d->nr; sub++) + d->sub[sub].filter = list_objects_filter__init( + filter->omits ? &d->sub[sub].omits : NULL, + &filter_options->sub[sub]); + + filter->filter_data = d; + filter->filter_object_fn = filter_combine; + filter->free_fn = filter_combine__free; + filter->finalize_omits_fn = filter_combine__finalize_omits; +} + +typedef void (*filter_init_fn)( + struct list_objects_filter_options *filter_options, + struct filter *filter); /* * Must match "enum list_objects_filter_choice". @@ -493,14 +642,14 @@ static filter_init_fn s_filters[] = { filter_blobs_limit__init, filter_trees_depth__init, filter_sparse_oid__init, + filter_combine__init, }; -void *list_objects_filter__init( +struct filter *list_objects_filter__init( struct oidset *omitted, - struct list_objects_filter_options *filter_options, - filter_object_fn *filter_fn, - filter_free_fn *filter_free_fn) + struct list_objects_filter_options *filter_options) { + struct filter *filter; filter_init_fn init_fn; assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT); @@ -510,10 +659,44 @@ void *list_objects_filter__init( filter_options->choice); init_fn = s_filters[filter_options->choice]; - if (init_fn) - return init_fn(omitted, filter_options, - filter_fn, filter_free_fn); - *filter_fn = NULL; - *filter_free_fn = NULL; - return NULL; + if (!init_fn) + return NULL; + + filter = xcalloc(1, sizeof(*filter)); + filter->omits = omitted; + init_fn(filter_options, filter); + return filter; +} + +enum list_objects_filter_result list_objects_filter__filter_object( + struct repository *r, + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + struct filter *filter) +{ + if (filter && (obj->flags & NOT_USER_GIVEN)) + return filter->filter_object_fn(r, filter_situation, obj, + pathname, filename, + filter->omits, + filter->filter_data); + /* + * No filter is active or user gave object explicitly. In this case, + * always show the object (except when LOFS_END_TREE, since this tree + * had already been shown when LOFS_BEGIN_TREE). + */ + if (filter_situation == LOFS_END_TREE) + return 0; + return LOFR_MARK_SEEN | LOFR_DO_SHOW; +} + +void list_objects_filter__free(struct filter *filter) +{ + if (!filter) + return; + if (filter->finalize_omits_fn && filter->omits) + filter->finalize_omits_fn(filter->omits, filter->filter_data); + filter->free_fn(filter->filter_data); + free(filter); } diff --git a/list-objects-filter.h b/list-objects-filter.h index 1d45a4ad57..cfd784e203 100644 --- a/list-objects-filter.h +++ b/list-objects-filter.h @@ -60,30 +60,36 @@ enum list_objects_filter_situation { LOFS_BLOB }; -typedef enum list_objects_filter_result (*filter_object_fn)( +struct filter; + +/* + * Constructor for the set of defined list-objects filters. + * The `omitted` set is optional. It is populated with objects that the + * filter excludes. This set should not be considered finalized until + * after list_objects_filter__free is called on the returned `struct + * filter *`. + */ +struct filter *list_objects_filter__init( + struct oidset *omitted, + struct list_objects_filter_options *filter_options); + +/* + * Lets `filter` decide how to handle the `obj`. If `filter` is NULL, this + * function behaves as expected if no filter is configured: all objects are + * included. + */ +enum list_objects_filter_result list_objects_filter__filter_object( struct repository *r, enum list_objects_filter_situation filter_situation, struct object *obj, const char *pathname, const char *filename, - void *filter_data); - -typedef void (*filter_free_fn)(void *filter_data); + struct filter *filter); /* - * Constructor for the set of defined list-objects filters. - * Returns a generic "void *filter_data". - * - * The returned "filter_fn" will be used by traverse_commit_list() - * to filter the results. - * - * The returned "filter_free_fn" is a destructor for the - * filter_data. + * Destroys `filter` and finalizes the `omitted` set, if present. Does + * nothing if `filter` is null. */ -void *list_objects_filter__init( - struct oidset *omitted, - struct list_objects_filter_options *filter_options, - filter_object_fn *filter_fn, - filter_free_fn *filter_free_fn); +void list_objects_filter__free(struct filter *filter); #endif /* LIST_OBJECTS_FILTER_H */ diff --git a/list-objects.c b/list-objects.c index b5651ddd5b..9307d91fb3 100644 --- a/list-objects.c +++ b/list-objects.c @@ -18,8 +18,7 @@ struct traversal_context { show_object_fn show_object; show_commit_fn show_commit; void *show_data; - filter_object_fn filter_fn; - void *filter_data; + struct filter *filter; }; static void process_blob(struct traversal_context *ctx, @@ -29,7 +28,7 @@ static void process_blob(struct traversal_context *ctx, { struct object *obj = &blob->object; size_t pathlen; - enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW; + enum list_objects_filter_result r; if (!ctx->revs->blob_objects) return; @@ -54,11 +53,10 @@ static void process_blob(struct traversal_context *ctx, pathlen = path->len; strbuf_addstr(path, name); - if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) - r = ctx->filter_fn(ctx->revs->repo, - LOFS_BLOB, obj, - path->buf, &path->buf[pathlen], - ctx->filter_data); + r = list_objects_filter__filter_object(ctx->revs->repo, + LOFS_BLOB, obj, + path->buf, &path->buf[pathlen], + ctx->filter); if (r & LOFR_MARK_SEEN) obj->flags |= SEEN; if (r & LOFR_DO_SHOW) @@ -157,7 +155,7 @@ static void process_tree(struct traversal_context *ctx, struct object *obj = &tree->object; struct rev_info *revs = ctx->revs; int baselen = base->len; - enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW; + enum list_objects_filter_result r; int failed_parse; if (!revs->tree_objects) @@ -186,11 +184,10 @@ static void process_tree(struct traversal_context *ctx, } strbuf_addstr(base, name); - if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) - r = ctx->filter_fn(ctx->revs->repo, - LOFS_BEGIN_TREE, obj, - base->buf, &base->buf[baselen], - ctx->filter_data); + r = list_objects_filter__filter_object(ctx->revs->repo, + LOFS_BEGIN_TREE, obj, + base->buf, &base->buf[baselen], + ctx->filter); if (r & LOFR_MARK_SEEN) obj->flags |= SEEN; if (r & LOFR_DO_SHOW) @@ -203,16 +200,14 @@ static void process_tree(struct traversal_context *ctx, else if (!failed_parse) process_tree_contents(ctx, tree, base); - if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) { - r = ctx->filter_fn(ctx->revs->repo, - LOFS_END_TREE, obj, - base->buf, &base->buf[baselen], - ctx->filter_data); - if (r & LOFR_MARK_SEEN) - obj->flags |= SEEN; - if (r & LOFR_DO_SHOW) - ctx->show_object(obj, base->buf, ctx->show_data); - } + r = list_objects_filter__filter_object(ctx->revs->repo, + LOFS_END_TREE, obj, + base->buf, &base->buf[baselen], + ctx->filter); + if (r & LOFR_MARK_SEEN) + obj->flags |= SEEN; + if (r & LOFR_DO_SHOW) + ctx->show_object(obj, base->buf, ctx->show_data); strbuf_setlen(base, baselen); free_tree_buffer(tree); @@ -402,8 +397,7 @@ void traverse_commit_list(struct rev_info *revs, ctx.show_commit = show_commit; ctx.show_object = show_object; ctx.show_data = show_data; - ctx.filter_fn = NULL; - ctx.filter_data = NULL; + ctx.filter = NULL; do_traverse(&ctx); } @@ -416,17 +410,12 @@ void traverse_commit_list_filtered( struct oidset *omitted) { struct traversal_context ctx; - filter_free_fn filter_free_fn = NULL; ctx.revs = revs; ctx.show_object = show_object; ctx.show_commit = show_commit; ctx.show_data = show_data; - ctx.filter_fn = NULL; - - ctx.filter_data = list_objects_filter__init(omitted, filter_options, - &ctx.filter_fn, &filter_free_fn); + ctx.filter = list_objects_filter__init(omitted, filter_options); do_traverse(&ctx); - if (ctx.filter_data && filter_free_fn) - filter_free_fn(ctx.filter_data); + list_objects_filter__free(ctx.filter); } @@ -774,8 +774,10 @@ void strbuf_addstr_xml_quoted(struct strbuf *buf, const char *s) } } -static int is_rfc3986_reserved(char ch) +int is_rfc3986_reserved_or_unreserved(char ch) { + if (is_rfc3986_unreserved(ch)) + return 1; switch (ch) { case '!': case '*': case '\'': case '(': case ')': case ';': case ':': case '@': case '&': case '=': case '+': case '$': @@ -785,20 +787,19 @@ static int is_rfc3986_reserved(char ch) return 0; } -static int is_rfc3986_unreserved(char ch) +int is_rfc3986_unreserved(char ch) { return isalnum(ch) || ch == '-' || ch == '_' || ch == '.' || ch == '~'; } static void strbuf_add_urlencode(struct strbuf *sb, const char *s, size_t len, - int reserved) + char_predicate allow_unencoded_fn) { strbuf_grow(sb, len); while (len--) { char ch = *s++; - if (is_rfc3986_unreserved(ch) || - (!reserved && is_rfc3986_reserved(ch))) + if (allow_unencoded_fn(ch)) strbuf_addch(sb, ch); else strbuf_addf(sb, "%%%02x", (unsigned char)ch); @@ -806,9 +807,9 @@ static void strbuf_add_urlencode(struct strbuf *sb, const char *s, size_t len, } void strbuf_addstr_urlencode(struct strbuf *sb, const char *s, - int reserved) + char_predicate allow_unencoded_fn) { - strbuf_add_urlencode(sb, s, strlen(s), reserved); + strbuf_add_urlencode(sb, s, strlen(s), allow_unencoded_fn); } static void strbuf_humanise(struct strbuf *buf, off_t bytes, @@ -672,8 +672,13 @@ void strbuf_branchname(struct strbuf *sb, const char *name, */ int strbuf_check_branch_ref(struct strbuf *sb, const char *name); +typedef int (*char_predicate)(char ch); + +int is_rfc3986_unreserved(char ch); +int is_rfc3986_reserved_or_unreserved(char ch); + void strbuf_addstr_urlencode(struct strbuf *sb, const char *name, - int reserved); + char_predicate allow_unencoded_fn); __attribute__((format (printf,1,2))) int printf_ln(const char *fmt, ...); diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 73cd95812f..fc634a56b2 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -208,6 +208,25 @@ test_expect_success 'use fsck before and after manually fetching a missing subtr test_cmp unique_types.expected unique_types.observed ' +test_expect_success 'implicitly construct combine: filter with repeated flags' ' + GIT_TRACE=$(pwd)/trace git clone --bare \ + --filter=blob:none --filter=tree:1 \ + "file://$(pwd)/srv.bare" pc2 && + grep "trace:.* git pack-objects .*--filter=combine:blob:none+tree:1" \ + trace && + git -C pc2 rev-list --objects --missing=allow-any HEAD >objects && + + # We should have gotten some root trees. + grep " $" objects && + # Should not have gotten any non-root trees or blobs. + ! grep " ." objects && + + xargs -n 1 git -C pc2 cat-file -t <objects >types && + sort -u types >unique_types.actual && + test_write_lines commit tree >unique_types.expected && + test_cmp unique_types.expected unique_types.actual +' + test_expect_success 'partial clone fetches blobs pointed to by refs even if normally filtered out' ' rm -rf src dst && git init src && diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh index acd7f5ab80..de0e5a5d36 100755 --- a/t/t6112-rev-list-filters-objects.sh +++ b/t/t6112-rev-list-filters-objects.sh @@ -278,7 +278,19 @@ test_expect_success 'verify skipping tree iteration when not collecting omits' ' test_line_count = 2 actual && # Make sure no other trees were considered besides the root. - ! grep "Skipping contents of tree [^.]" filter_trace + ! grep "Skipping contents of tree [^.]" filter_trace && + + # Try this again with "combine:". If both sub-filters are skipping + # trees, the composite filter should also skip trees. This is not + # important unless the user does combine:tree:X+tree:Y or another filter + # besides "tree:" is implemented in the future which can skip trees. + GIT_TRACE=1 git -C r3 rev-list \ + --objects --filter=combine:tree:1+tree:3 HEAD 2>filter_trace && + + # Only skip the dir1/ tree, which is shared between the two commits. + grep "Skipping contents of tree " filter_trace >actual && + test_write_lines "Skipping contents of tree dir1/..." >expected && + test_cmp expected actual ' # Test tree:# filters. @@ -330,6 +342,148 @@ test_expect_success 'verify tree:3 includes everything expected' ' test_line_count = 10 actual ' +test_expect_success 'combine:... for a simple combination' ' + git -C r3 rev-list --objects --filter=combine:tree:2+blob:none HEAD \ + >actual && + + expect_has HEAD "" && + expect_has HEAD~1 "" && + expect_has HEAD dir1 && + + # There are also 2 commit objects + test_line_count = 5 actual && + + cp actual expected && + + # Try again using repeated --filter - this is equivalent to a manual + # combine with "combine:...+..." + git -C r3 rev-list --objects --filter=combine:tree:2 \ + --filter=blob:none HEAD >actual && + + test_cmp expected actual +' + +test_expect_success 'combine:... with URL encoding' ' + git -C r3 rev-list --objects \ + --filter=combine:tree%3a2+blob:%6Eon%65 HEAD >actual && + + expect_has HEAD "" && + expect_has HEAD~1 "" && + expect_has HEAD dir1 && + + # There are also 2 commit objects + test_line_count = 5 actual +' + +expect_invalid_filter_spec () { + spec="$1" && + err="$2" && + + test_must_fail git -C r3 rev-list --objects --filter="$spec" HEAD \ + >actual 2>actual_stderr && + test_must_be_empty actual && + test_i18ngrep "$err" actual_stderr +} + +test_expect_success 'combine:... while URL-encoding things that should not be' ' + expect_invalid_filter_spec combine%3Atree:2+blob:none \ + "invalid filter-spec" +' + +test_expect_success 'combine: with nothing after the :' ' + expect_invalid_filter_spec combine: "expected something after combine:" +' + +test_expect_success 'parse error in first sub-filter in combine:' ' + expect_invalid_filter_spec combine:tree:asdf+blob:none \ + "expected .tree:<depth>." +' + +test_expect_success 'combine:... with non-encoded reserved chars' ' + expect_invalid_filter_spec combine:tree:2+sparse:@xyz \ + "must escape char in sub-filter-spec: .@." && + expect_invalid_filter_spec combine:tree:2+sparse:\` \ + "must escape char in sub-filter-spec: .\`." && + expect_invalid_filter_spec combine:tree:2+sparse:~abc \ + "must escape char in sub-filter-spec: .\~." +' + +test_expect_success 'validate err msg for "combine:<valid-filter>+"' ' + expect_invalid_filter_spec combine:tree:2+ "expected .tree:<depth>." +' + +test_expect_success 'combine:... with edge-case hex digits: Ff Aa 0 9' ' + git -C r3 rev-list --objects --filter="combine:tree:2+bl%6Fb:n%6fne" \ + HEAD >actual && + test_line_count = 5 actual && + git -C r3 rev-list --objects --filter="combine:tree%3A2+blob%3anone" \ + HEAD >actual && + test_line_count = 5 actual && + git -C r3 rev-list --objects --filter="combine:tree:%30" HEAD >actual && + test_line_count = 2 actual && + git -C r3 rev-list --objects --filter="combine:tree:%39+blob:none" \ + HEAD >actual && + test_line_count = 5 actual +' + +test_expect_success 'add sparse pattern blobs whose paths have reserved chars' ' + cp r3/pattern r3/pattern1+renamed% && + cp r3/pattern "r3/p;at%ter+n" && + cp r3/pattern r3/^~pattern && + git -C r3 add pattern1+renamed% "p;at%ter+n" ^~pattern && + git -C r3 commit -m "add sparse pattern files with reserved chars" +' + +test_expect_success 'combine:... with more than two sub-filters' ' + git -C r3 rev-list --objects \ + --filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern \ + HEAD >actual && + + expect_has HEAD "" && + expect_has HEAD~1 "" && + expect_has HEAD~2 "" && + expect_has HEAD dir1 && + expect_has HEAD dir1/sparse1 && + expect_has HEAD dir1/sparse2 && + + # Should also have 3 commits + test_line_count = 9 actual && + + # Try again, this time making sure the last sub-filter is only + # URL-decoded once. + cp actual expect && + + git -C r3 rev-list --objects \ + --filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern1%2brenamed%25 \ + HEAD >actual && + test_cmp expect actual && + + # Use the same composite filter again, but with a pattern file name that + # requires encoding multiple characters, and use implicit filter + # combining. + test_when_finished "rm -f trace1" && + GIT_TRACE=$(pwd)/trace1 git -C r3 rev-list --objects \ + --filter=tree:3 --filter=blob:limit=40 \ + --filter=sparse:oid="master:p;at%ter+n" \ + HEAD >actual && + + test_cmp expect actual && + grep "Add to combine filter-spec: sparse:oid=master:p%3bat%25ter%2bn" \ + trace1 && + + # Repeat the above test, but this time, the characters to encode are in + # the LHS of the combined filter. + test_when_finished "rm -f trace2" && + GIT_TRACE=$(pwd)/trace2 git -C r3 rev-list --objects \ + --filter=sparse:oid=master:^~pattern \ + --filter=tree:3 --filter=blob:limit=40 \ + HEAD >actual && + + test_cmp expect actual && + grep "Add to combine filter-spec: sparse:oid=master:%5e%7epattern" \ + trace2 +' + # Test provisional omit collection logic with a repo that has objects appearing # at multiple depths - first deeper than the filter's threshold, then shallow. @@ -373,6 +527,37 @@ test_expect_success 'verify skipping tree iteration when collecting omits' ' test_cmp expect actual ' +test_expect_success 'setup r5' ' + git init r5 && + mkdir -p r5/subdir && + + echo 1 >r5/short-root && + echo 12345 >r5/long-root && + echo a >r5/subdir/short-subdir && + echo abcde >r5/subdir/long-subdir && + + git -C r5 add short-root long-root subdir && + git -C r5 commit -m "commit msg" +' + +test_expect_success 'verify collecting omits in combined: filter' ' + # Note that this test guards against the naive implementation of simply + # giving both filters the same "omits" set and expecting it to + # automatically merge them. + git -C r5 rev-list --objects --quiet --filter-print-omitted \ + --filter=combine:tree:2+blob:limit=3 HEAD >actual && + + # Expect 0 trees/commits, 3 blobs omitted (all blobs except short-root) + omitted_1=$(echo 12345 | git hash-object --stdin) && + omitted_2=$(echo a | git hash-object --stdin) && + omitted_3=$(echo abcde | git hash-object --stdin) && + + grep ~$omitted_1 actual && + grep ~$omitted_2 actual && + grep ~$omitted_3 actual && + test_line_count = 3 actual +' + # Test tree:<depth> where a tree is iterated to twice - once where a subentry is # too deep to be included, and again where the blob inside it is shallow enough # to be included. This makes sure we don't use LOFR_MARK_SEEN incorrectly (we @@ -441,11 +626,4 @@ test_expect_success 'expand blob limit in protocol' ' grep "blob:limit=1024" trace ' -test_expect_success 'expand tree depth limit in protocol' ' - GIT_TRACE_PACKET="$(pwd)/tree_trace" git -c protocol.version=2 clone \ - --filter=tree:0k "file://$(pwd)/r2" tree && - ! grep "tree:0k" tree_trace && - grep "tree:0" tree_trace -' - test_done diff --git a/transport-helper.c b/transport-helper.c index 6b05a88faf..3169111772 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -682,13 +682,9 @@ static int fetch(struct transport *transport, set_helper_option(transport, "update-shallow", "true"); if (data->transport_options.filter_options.choice) { - struct strbuf expanded_filter_spec = STRBUF_INIT; - expand_list_objects_filter_spec( - &data->transport_options.filter_options, - &expanded_filter_spec); - set_helper_option(transport, "filter", - expanded_filter_spec.buf); - strbuf_release(&expanded_filter_spec); + const char *spec = expand_list_objects_filter_spec( + &data->transport_options.filter_options); + set_helper_option(transport, "filter", spec); } if (data->transport_options.negotiation_tips) diff --git a/transport.c b/transport.c index 778c60bf57..6324cfc482 100644 --- a/transport.c +++ b/transport.c @@ -224,6 +224,7 @@ static int set_git_option(struct git_transport_options *opts, opts->no_dependents = !!value; return 0; } else if (!strcmp(name, TRANS_OPT_LIST_OBJECTS_FILTER)) { + list_objects_filter_die_if_populated(&opts->filter_options); parse_list_objects_filter(&opts->filter_options, value); return 0; } diff --git a/upload-pack.c b/upload-pack.c index 222cd3ad89..875db92996 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -140,18 +140,17 @@ static void create_pack_file(const struct object_array *have_obj, argv_array_push(&pack_objects.args, "--delta-base-offset"); if (use_include_tag) argv_array_push(&pack_objects.args, "--include-tag"); - if (filter_options.filter_spec) { - struct strbuf expanded_filter_spec = STRBUF_INIT; - expand_list_objects_filter_spec(&filter_options, - &expanded_filter_spec); + if (filter_options.choice) { + const char *spec = + expand_list_objects_filter_spec(&filter_options); if (pack_objects.use_shell) { struct strbuf buf = STRBUF_INIT; - sq_quote_buf(&buf, expanded_filter_spec.buf); + sq_quote_buf(&buf, spec); argv_array_pushf(&pack_objects.args, "--filter=%s", buf.buf); strbuf_release(&buf); } else { argv_array_pushf(&pack_objects.args, "--filter=%s", - expanded_filter_spec.buf); + spec); } } @@ -884,6 +883,7 @@ static void receive_needs(struct packet_reader *reader, struct object_array *wan if (skip_prefix(reader->line, "filter ", &arg)) { if (!filter_capability_requested) die("git upload-pack: filtering capability not negotiated"); + list_objects_filter_die_if_populated(&filter_options); parse_list_objects_filter(&filter_options, arg); continue; } @@ -1305,6 +1305,7 @@ static void process_args(struct packet_reader *request, } if (allow_filter && skip_prefix(arg, "filter ", &p)) { + list_objects_filter_die_if_populated(&filter_options); parse_list_objects_filter(&filter_options, p); continue; } @@ -86,6 +86,12 @@ char *url_decode_mem(const char *url, int len) return url_decode_internal(&url, len, NULL, &out, 0); } +char *url_percent_decode(const char *encoded) +{ + struct strbuf out = STRBUF_INIT; + return url_decode_internal(&encoded, strlen(encoded), NULL, &out, 0); +} + char *url_decode_parameter_name(const char **query) { struct strbuf out = STRBUF_INIT; @@ -7,6 +7,14 @@ int is_url(const char *url); int is_urlschemechar(int first_flag, int ch); char *url_decode(const char *url); char *url_decode_mem(const char *url, int len); + +/* + * Similar to the url_decode_{,mem} methods above, but doesn't assume there + * is a scheme followed by a : at the start of the string. Instead, %-sequences + * before any : are also parsed. + */ +char *url_percent_decode(const char *encoded); + char *url_decode_parameter_name(const char **query); char *url_decode_parameter_value(const char **query); |