diff options
author | Junio C Hamano <gitster@pobox.com> | 2019-02-05 14:26:09 -0800 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2019-02-05 14:26:10 -0800 |
commit | 073312b4c7a15d484d2ffdc2eb4a406df9cf9c55 (patch) | |
tree | 473d2508e4ba7d7e424445d2ddce2a9cf659ea46 | |
parent | Merge branch 'sb/more-repo-in-api' (diff) | |
parent | filter-options: expand scaled numbers (diff) | |
download | tgif-073312b4c7a15d484d2ffdc2eb4a406df9cf9c55.tar.xz |
Merge branch 'js/filter-options-should-use-plain-int'
Update the protocol message specification to allow only the limited
use of scaled quantities. This is ensure potential compatibility
issues will not go out of hand.
* js/filter-options-should-use-plain-int:
filter-options: expand scaled numbers
tree:<depth>: skip some trees even when collecting omits
list-objects-filter: teach tree:# how to handle >0
-rw-r--r-- | Documentation/rev-list-options.txt | 9 | ||||
-rw-r--r-- | Documentation/technical/protocol-v2.txt | 8 | ||||
-rw-r--r-- | builtin/clone.c | 6 | ||||
-rw-r--r-- | builtin/fetch.c | 7 | ||||
-rw-r--r-- | fetch-pack.c | 15 | ||||
-rw-r--r-- | list-objects-filter-options.c | 27 | ||||
-rw-r--r-- | list-objects-filter-options.h | 20 | ||||
-rw-r--r-- | list-objects-filter.c | 122 | ||||
-rwxr-xr-x | t/t6112-rev-list-filters-objects.sh | 139 | ||||
-rw-r--r-- | transport-helper.c | 13 | ||||
-rw-r--r-- | upload-pack.c | 7 |
11 files changed, 329 insertions, 44 deletions
diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 98b538bc77..91b3a72bda 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -730,8 +730,13 @@ specification contained in <path>. + The form '--filter=tree:<depth>' omits all blobs and trees whose depth from the root tree is >= <depth> (minimum depth if an object is located -at multiple depths in the commits traversed). Currently, only <depth>=0 -is supported, which omits all blobs and trees. +at multiple depths in the commits traversed). <depth>=0 will not include +any trees or blobs unless included explicitly in the command-line (or +standard input when --stdin is used). <depth>=1 will include only the +tree and blobs which are referenced directly by a commit reachable from +<commit> or an explicitly-given object. <depth>=2 is like <depth>=1 +while also including trees and blobs one more level removed from an +explicitly-given commit or tree. --no-filter:: Turn off any previous `--filter=` argument. diff --git a/Documentation/technical/protocol-v2.txt b/Documentation/technical/protocol-v2.txt index 09e4e0273f..292060a9dc 100644 --- a/Documentation/technical/protocol-v2.txt +++ b/Documentation/technical/protocol-v2.txt @@ -296,7 +296,13 @@ included in the client's request: Request that various objects from the packfile be omitted using one of several filtering techniques. These are intended for use with partial clone and partial fetch operations. See - `rev-list` for possible "filter-spec" values. + `rev-list` for possible "filter-spec" values. When communicating + with other processes, senders SHOULD translate scaled integers + (e.g. "1k") into a fully-expanded form (e.g. "1024") to aid + interoperability with older receivers that may not understand + newly-invented scaling suffixes. However, receivers SHOULD + accept the following suffixes: 'k', 'm', and 'g' for 1024, + 1048576, and 1073741824, respectively. If the 'ref-in-want' feature is advertised, the following argument can be included in the client's request as well as the potential addition of diff --git a/builtin/clone.c b/builtin/clone.c index 7c7f98c72c..739de68820 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1136,9 +1136,13 @@ int cmd_clone(int argc, const char **argv, const char *prefix) option_upload_pack); if (filter_options.choice) { + struct strbuf expanded_filter_spec = STRBUF_INIT; + expand_list_objects_filter_spec(&filter_options, + &expanded_filter_spec); transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, - filter_options.filter_spec); + expanded_filter_spec.buf); transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); + strbuf_release(&expanded_filter_spec); } if (transport->smart_options && !deepen && !filter_options.choice) diff --git a/builtin/fetch.c b/builtin/fetch.c index c316c03ba2..61a443031d 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1165,6 +1165,7 @@ static void add_negotiation_tips(struct git_transport_options *smart_options) static struct transport *prepare_transport(struct remote *remote, int deepen) { struct transport *transport; + transport = transport_get(remote, NULL); transport_set_verbosity(transport, verbosity, progress); transport->family = family; @@ -1184,9 +1185,13 @@ static struct transport *prepare_transport(struct remote *remote, int deepen) if (update_shallow) set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes"); if (filter_options.choice) { + struct strbuf expanded_filter_spec = STRBUF_INIT; + expand_list_objects_filter_spec(&filter_options, + &expanded_filter_spec); set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, - filter_options.filter_spec); + expanded_filter_spec.buf); set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); + strbuf_release(&expanded_filter_spec); } if (negotiation_tip.nr) { if (transport->smart_options) diff --git a/fetch-pack.c b/fetch-pack.c index 577faa6229..bf8984b8e8 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -329,9 +329,14 @@ static int find_common(struct fetch_negotiator *negotiator, packet_buf_write(&req_buf, "deepen-not %s", s->string); } } - if (server_supports_filtering && args->filter_options.choice) + if (server_supports_filtering && args->filter_options.choice) { + struct strbuf expanded_filter_spec = STRBUF_INIT; + expand_list_objects_filter_spec(&args->filter_options, + &expanded_filter_spec); packet_buf_write(&req_buf, "filter %s", - args->filter_options.filter_spec); + expanded_filter_spec.buf); + strbuf_release(&expanded_filter_spec); + } packet_buf_flush(&req_buf); state_len = req_buf.len; @@ -1122,9 +1127,13 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, /* Add filter */ if (server_supports_feature("fetch", "filter", 0) && args->filter_options.choice) { + struct strbuf expanded_filter_spec = STRBUF_INIT; print_verbose(args, _("Server supports filter")); + expand_list_objects_filter_spec(&args->filter_options, + &expanded_filter_spec); packet_buf_write(&req_buf, "filter %s", - args->filter_options.filter_spec); + expanded_filter_spec.buf); + strbuf_release(&expanded_filter_spec); } else if (args->filter_options.choice) { warning("filtering not recognized by server, ignoring"); } diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index e8da2e8581..9efb3e9902 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -18,8 +18,9 @@ * See Documentation/rev-list-options.txt for allowed values for <arg>. * * Capture the given arg as the "filter_spec". This can be forwarded to - * subordinate commands when necessary. We also "intern" the arg for - * the convenience of the current command. + * subordinate commands when necessary (although it's better to pass it through + * expand_list_objects_filter_spec() first). We also "intern" the arg for the + * convenience of the current command. */ static int gently_parse_list_objects_filter( struct list_objects_filter_options *filter_options, @@ -50,16 +51,15 @@ static int gently_parse_list_objects_filter( } } else if (skip_prefix(arg, "tree:", &v0)) { - unsigned long depth; - if (!git_parse_ulong(v0, &depth) || depth != 0) { + if (!git_parse_ulong(v0, &filter_options->tree_exclude_depth)) { if (errbuf) { strbuf_addstr( errbuf, - _("only 'tree:0' is supported")); + _("expected 'tree:<depth>'")); } return 1; } - filter_options->choice = LOFC_TREE_NONE; + filter_options->choice = LOFC_TREE_DEPTH; return 0; } else if (skip_prefix(arg, "sparse:oid=", &v0)) { @@ -112,6 +112,21 @@ int opt_parse_list_objects_filter(const struct option *opt, return parse_list_objects_filter(filter_options, arg); } +void expand_list_objects_filter_spec( + const struct list_objects_filter_options *filter, + struct strbuf *expanded_spec) +{ + strbuf_init(expanded_spec, strlen(filter->filter_spec)); + if (filter->choice == LOFC_BLOB_LIMIT) + strbuf_addf(expanded_spec, "blob:limit=%lu", + filter->blob_limit_value); + else if (filter->choice == LOFC_TREE_DEPTH) + strbuf_addf(expanded_spec, "tree:%lu", + filter->tree_exclude_depth); + else + strbuf_addstr(expanded_spec, filter->filter_spec); +} + void list_objects_filter_release( struct list_objects_filter_options *filter_options) { diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index af64e5c66f..e3adc78ebf 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -2,6 +2,7 @@ #define LIST_OBJECTS_FILTER_OPTIONS_H #include "parse-options.h" +#include "strbuf.h" /* * The list of defined filters for list-objects. @@ -10,7 +11,7 @@ enum list_objects_filter_choice { LOFC_DISABLED = 0, LOFC_BLOB_NONE, LOFC_BLOB_LIMIT, - LOFC_TREE_NONE, + LOFC_TREE_DEPTH, LOFC_SPARSE_OID, LOFC_SPARSE_PATH, LOFC__COUNT /* must be last */ @@ -20,8 +21,9 @@ struct list_objects_filter_options { /* * 'filter_spec' is the raw argument value given on the command line * or protocol request. (The part after the "--keyword=".) For - * commands that launch filtering sub-processes, this value should be - * passed to them as received by the current process. + * commands that launch filtering sub-processes, or for communication + * over the network, don't use this value; use the result of + * expand_list_objects_filter_spec() instead. */ char *filter_spec; @@ -44,6 +46,7 @@ struct list_objects_filter_options { struct object_id *sparse_oid_value; char *sparse_path_value; unsigned long blob_limit_value; + unsigned long tree_exclude_depth; }; /* Normalized command line arguments */ @@ -61,6 +64,17 @@ int opt_parse_list_objects_filter(const struct option *opt, N_("object filtering"), 0, \ opt_parse_list_objects_filter } +/* + * Translates abbreviated numbers in the filter's filter_spec into their + * fully-expanded forms (e.g., "limit:blob=1k" becomes "limit:blob=1024"). + * + * This form should be used instead of the raw filter_spec field when + * communicating with a remote process or subprocess. + */ +void expand_list_objects_filter_spec( + const struct list_objects_filter_options *filter, + struct strbuf *expanded_spec); + void list_objects_filter_release( struct list_objects_filter_options *filter_options); diff --git a/list-objects-filter.c b/list-objects-filter.c index a62624a1ce..ee449de3f7 100644 --- a/list-objects-filter.c +++ b/list-objects-filter.c @@ -10,6 +10,7 @@ #include "list-objects.h" #include "list-objects-filter.h" #include "list-objects-filter-options.h" +#include "oidmap.h" #include "oidset.h" #include "object-store.h" @@ -84,11 +85,44 @@ static void *filter_blobs_none__init( * A filter for list-objects to omit ALL trees and blobs from the traversal. * Can OPTIONALLY collect a list of the omitted OIDs. */ -struct filter_trees_none_data { +struct filter_trees_depth_data { struct oidset *omits; + + /* + * Maps trees to the minimum depth at which they were seen. It is not + * necessary to re-traverse a tree at deeper or equal depths than it has + * already been traversed. + * + * We can't use LOFR_MARK_SEEN for tree objects since this will prevent + * it from being traversed at shallower depths. + */ + struct oidmap seen_at_depth; + + unsigned long exclude_depth; + unsigned long current_depth; }; -static enum list_objects_filter_result filter_trees_none( +struct seen_map_entry { + struct oidmap_entry base; + size_t depth; +}; + +/* Returns 1 if the oid was in the omits set before it was invoked. */ +static int filter_trees_update_omits( + struct object *obj, + struct filter_trees_depth_data *filter_data, + int include_it) +{ + if (!filter_data->omits) + return 0; + + if (include_it) + return oidset_remove(filter_data->omits, &obj->oid); + else + return oidset_insert(filter_data->omits, &obj->oid); +} + +static enum list_objects_filter_result filter_trees_depth( struct repository *r, enum list_objects_filter_situation filter_situation, struct object *obj, @@ -96,43 +130,91 @@ static enum list_objects_filter_result filter_trees_none( const char *filename, void *filter_data_) { - struct filter_trees_none_data *filter_data = filter_data_; + struct filter_trees_depth_data *filter_data = filter_data_; + struct seen_map_entry *seen_info; + int include_it = filter_data->current_depth < + filter_data->exclude_depth; + int filter_res; + int already_seen; + + /* + * Note that we do not use _MARK_SEEN in order to allow re-traversal in + * case we encounter a tree or blob again at a shallower depth. + */ switch (filter_situation) { default: BUG("unknown filter_situation: %d", filter_situation); - case LOFS_BEGIN_TREE: + case LOFS_END_TREE: + assert(obj->type == OBJ_TREE); + filter_data->current_depth--; + return LOFR_ZERO; + case LOFS_BLOB: - if (filter_data->omits) { - oidset_insert(filter_data->omits, &obj->oid); - /* _MARK_SEEN but not _DO_SHOW (hard omit) */ - return LOFR_MARK_SEEN; + filter_trees_update_omits(obj, filter_data, include_it); + return include_it ? LOFR_MARK_SEEN | LOFR_DO_SHOW : LOFR_ZERO; + + case LOFS_BEGIN_TREE: + seen_info = oidmap_get( + &filter_data->seen_at_depth, &obj->oid); + if (!seen_info) { + seen_info = xcalloc(1, sizeof(*seen_info)); + oidcpy(&seen_info->base.oid, &obj->oid); + seen_info->depth = filter_data->current_depth; + oidmap_put(&filter_data->seen_at_depth, seen_info); + already_seen = 0; } else { - /* - * Not collecting omits so no need to to traverse tree. - */ - return LOFR_SKIP_TREE | LOFR_MARK_SEEN; + already_seen = + filter_data->current_depth >= seen_info->depth; } - case LOFS_END_TREE: - assert(obj->type == OBJ_TREE); - return LOFR_ZERO; + if (already_seen) { + filter_res = LOFR_SKIP_TREE; + } else { + int been_omitted = filter_trees_update_omits( + obj, filter_data, include_it); + seen_info->depth = filter_data->current_depth; + + if (include_it) + filter_res = LOFR_DO_SHOW; + else if (filter_data->omits && !been_omitted) + /* + * Must update omit information of children + * recursively; they have not been omitted yet. + */ + filter_res = LOFR_ZERO; + else + filter_res = LOFR_SKIP_TREE; + } + filter_data->current_depth++; + return filter_res; } } -static void* filter_trees_none__init( +static void filter_trees_free(void *filter_data) { + struct filter_trees_depth_data *d = filter_data; + if (!d) + return; + oidmap_free(&d->seen_at_depth, 1); + free(d); +} + +static void *filter_trees_depth__init( struct oidset *omitted, struct list_objects_filter_options *filter_options, filter_object_fn *filter_fn, filter_free_fn *filter_free_fn) { - struct filter_trees_none_data *d = xcalloc(1, sizeof(*d)); + struct filter_trees_depth_data *d = xcalloc(1, sizeof(*d)); d->omits = omitted; + oidmap_init(&d->seen_at_depth, 0); + d->exclude_depth = filter_options->tree_exclude_depth; + d->current_depth = 0; - *filter_fn = filter_trees_none; - *filter_free_fn = free; + *filter_fn = filter_trees_depth; + *filter_free_fn = filter_trees_free; return d; } @@ -430,7 +512,7 @@ static filter_init_fn s_filters[] = { NULL, filter_blobs_none__init, filter_blobs_limit__init, - filter_trees_none__init, + filter_trees_depth__init, filter_sparse_oid__init, filter_sparse_path__init, }; diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh index eb32505a6e..9c11427719 100755 --- a/t/t6112-rev-list-filters-objects.sh +++ b/t/t6112-rev-list-filters-objects.sh @@ -283,7 +283,7 @@ test_expect_success 'verify tree:0 includes trees in "filtered" output' ' # Make sure tree:0 does not iterate through any trees. -test_expect_success 'filter a GIANT tree through tree:0' ' +test_expect_success 'verify skipping tree iteration when not collecting omits' ' GIT_TRACE=1 git -C r3 rev-list \ --objects --filter=tree:0 HEAD 2>filter_trace && grep "Skipping contents of tree [.][.][.]" filter_trace >actual && @@ -294,6 +294,126 @@ test_expect_success 'filter a GIANT tree through tree:0' ' ! grep "Skipping contents of tree [^.]" filter_trace ' +# Test tree:# filters. + +expect_has () { + commit=$1 && + name=$2 && + + hash=$(git -C r3 rev-parse $commit:$name) && + grep "^$hash $name$" actual +} + +test_expect_success 'verify tree:1 includes root trees' ' + git -C r3 rev-list --objects --filter=tree:1 HEAD >actual && + + # We should get two root directories and two commits. + expect_has HEAD "" && + expect_has HEAD~1 "" && + test_line_count = 4 actual +' + +test_expect_success 'verify tree:2 includes root trees and immediate children' ' + git -C r3 rev-list --objects --filter=tree:2 HEAD >actual && + + expect_has HEAD "" && + expect_has HEAD~1 "" && + expect_has HEAD dir1 && + expect_has HEAD pattern && + expect_has HEAD sparse1 && + expect_has HEAD sparse2 && + + # There are also 2 commit objects + test_line_count = 8 actual +' + +test_expect_success 'verify tree:3 includes everything expected' ' + git -C r3 rev-list --objects --filter=tree:3 HEAD >actual && + + expect_has HEAD "" && + expect_has HEAD~1 "" && + expect_has HEAD dir1 && + expect_has HEAD dir1/sparse1 && + expect_has HEAD dir1/sparse2 && + expect_has HEAD pattern && + expect_has HEAD sparse1 && + expect_has HEAD sparse2 && + + # There are also 2 commit objects + test_line_count = 10 actual +' + +# Test provisional omit collection logic with a repo that has objects appearing +# at multiple depths - first deeper than the filter's threshold, then shallow. + +test_expect_success 'setup r4' ' + git init r4 && + + echo foo > r4/foo && + mkdir r4/subdir && + echo bar > r4/subdir/bar && + + mkdir r4/filt && + cp -r r4/foo r4/subdir r4/filt && + + git -C r4 add foo subdir filt && + git -C r4 commit -m "commit msg" +' + +expect_has_with_different_name () { + repo=$1 && + name=$2 && + + hash=$(git -C $repo rev-parse HEAD:$name) && + ! grep "^$hash $name$" actual && + grep "^$hash " actual && + ! grep "~$hash" actual +} + +test_expect_success 'test tree:# filter provisional omit for blob and tree' ' + git -C r4 rev-list --objects --filter-print-omitted --filter=tree:2 \ + HEAD >actual && + expect_has_with_different_name r4 filt/foo && + expect_has_with_different_name r4 filt/subdir +' + +test_expect_success 'verify skipping tree iteration when collecting omits' ' + GIT_TRACE=1 git -C r4 rev-list --filter-print-omitted \ + --objects --filter=tree:0 HEAD 2>filter_trace && + grep "^Skipping contents of tree " filter_trace >actual && + + echo "Skipping contents of tree subdir/..." >expect && + test_cmp expect actual +' + +# Test tree:<depth> where a tree is iterated to twice - once where a subentry is +# too deep to be included, and again where the blob inside it is shallow enough +# to be included. This makes sure we don't use LOFR_MARK_SEEN incorrectly (we +# can't use it because a tree can be iterated over again at a lower depth). + +test_expect_success 'tree:<depth> where we iterate over tree at two levels' ' + git init r5 && + + mkdir -p r5/a/subdir/b && + echo foo > r5/a/subdir/b/foo && + + mkdir -p r5/subdir/b && + echo foo > r5/subdir/b/foo && + + git -C r5 add a subdir && + git -C r5 commit -m "commit msg" && + + git -C r5 rev-list --objects --filter=tree:4 HEAD >actual && + expect_has_with_different_name r5 a/subdir/b/foo +' + +test_expect_success 'tree:<depth> which filters out blob but given as arg' ' + blob_hash=$(git -C r4 rev-parse HEAD:subdir/bar) && + + git -C r4 rev-list --objects --filter=tree:1 HEAD $blob_hash >actual && + grep ^$blob_hash actual +' + # Delete some loose objects and use rev-list, but WITHOUT any filtering. # This models previously omitted objects that we did not receive. @@ -324,4 +444,21 @@ test_expect_success 'rev-list W/ missing=allow-any' ' git -C r1 rev-list --quiet --missing=allow-any --objects HEAD ' +# Test expansion of filter specs. + +test_expect_success 'expand blob limit in protocol' ' + git -C r2 config --local uploadpack.allowfilter 1 && + GIT_TRACE_PACKET="$(pwd)/trace" git -c protocol.version=2 clone \ + --filter=blob:limit=1k "file://$(pwd)/r2" limit && + ! grep "blob:limit=1k" trace && + grep "blob:limit=1024" trace +' + +test_expect_success 'expand tree depth limit in protocol' ' + GIT_TRACE_PACKET="$(pwd)/tree_trace" git -c protocol.version=2 clone \ + --filter=tree:0k "file://$(pwd)/r2" tree && + ! grep "tree:0k" tree_trace && + grep "tree:0" tree_trace +' + test_done diff --git a/transport-helper.c b/transport-helper.c index 6cf3bb324e..1f52c95fd8 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -679,10 +679,15 @@ static int fetch(struct transport *transport, if (data->transport_options.update_shallow) set_helper_option(transport, "update-shallow", "true"); - if (data->transport_options.filter_options.choice) - set_helper_option( - transport, "filter", - data->transport_options.filter_options.filter_spec); + if (data->transport_options.filter_options.choice) { + struct strbuf expanded_filter_spec = STRBUF_INIT; + expand_list_objects_filter_spec( + &data->transport_options.filter_options, + &expanded_filter_spec); + set_helper_option(transport, "filter", + expanded_filter_spec.buf); + strbuf_release(&expanded_filter_spec); + } if (data->transport_options.negotiation_tips) warning("Ignoring --negotiation-tip because the protocol does not support it."); diff --git a/upload-pack.c b/upload-pack.c index 9df27b55a0..ee89381585 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -139,14 +139,17 @@ static void create_pack_file(const struct object_array *have_obj, if (use_include_tag) argv_array_push(&pack_objects.args, "--include-tag"); if (filter_options.filter_spec) { + struct strbuf expanded_filter_spec = STRBUF_INIT; + expand_list_objects_filter_spec(&filter_options, + &expanded_filter_spec); if (pack_objects.use_shell) { struct strbuf buf = STRBUF_INIT; - sq_quote_buf(&buf, filter_options.filter_spec); + sq_quote_buf(&buf, expanded_filter_spec.buf); argv_array_pushf(&pack_objects.args, "--filter=%s", buf.buf); strbuf_release(&buf); } else { argv_array_pushf(&pack_objects.args, "--filter=%s", - filter_options.filter_spec); + expanded_filter_spec.buf); } } |