summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/git-fast-export.txt29
-rw-r--r--builtin/fast-export.c50
-rwxr-xr-xt/t9351-fast-export-anonymize.sh11
3 files changed, 88 insertions, 2 deletions
diff --git a/Documentation/git-fast-export.txt b/Documentation/git-fast-export.txt
index e8950de3ba..1978dbdc6a 100644
--- a/Documentation/git-fast-export.txt
+++ b/Documentation/git-fast-export.txt
@@ -119,6 +119,11 @@ by keeping the marks the same across runs.
the shape of the history and stored tree. See the section on
`ANONYMIZING` below.
+--anonymize-map=<from>[:<to>]::
+ Convert token `<from>` to `<to>` in the anonymized output. If
+ `<to>` is omitted, map `<from>` to itself (i.e., do not
+ anonymize it). See the section on `ANONYMIZING` below.
+
--reference-excluded-parents::
By default, running a command such as `git fast-export
master~5..master` will not include the commit master{tilde}5
@@ -238,6 +243,30 @@ collapse "User 0", "User 1", etc into "User X"). This produces a much
smaller output, and it is usually easy to quickly confirm that there is
no private data in the stream.
+Reproducing some bugs may require referencing particular commits or
+paths, which becomes challenging after refnames and paths have been
+anonymized. You can ask for a particular token to be left as-is or
+mapped to a new value. For example, if you have a bug which reproduces
+with `git rev-list sensitive -- secret.c`, you can run:
+
+---------------------------------------------------
+$ git fast-export --anonymize --all \
+ --anonymize-map=sensitive:foo \
+ --anonymize-map=secret.c:bar.c \
+ >stream
+---------------------------------------------------
+
+After importing the stream, you can then run `git rev-list foo -- bar.c`
+in the anonymized repository.
+
+Note that paths and refnames are split into tokens at slash boundaries.
+The command above would anonymize `subdir/secret.c` as something like
+`path123/bar.c`; you could then search for `bar.c` in the anonymized
+repository to determine the final pathname.
+
+To make referencing the final pathname simpler, you can map each path
+component; so if you also anonymize `subdir` to `publicdir`, then the
+final pathname would be `publicdir/bar.c`.
LIMITATIONS
-----------
diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 1cbca5b4b4..b0b09bca30 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -45,6 +45,7 @@ static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
static struct string_list tag_refs = STRING_LIST_INIT_NODUP;
static struct refspec refspecs = REFSPEC_INIT_FETCH;
static int anonymize;
+static struct hashmap anonymized_seeds;
static struct revision_sources revision_sources;
static int parse_opt_signed_tag_mode(const struct option *opt,
@@ -168,8 +169,18 @@ static const char *anonymize_str(struct hashmap *map,
hashmap_entry_init(&key.hash, memhash(orig, len));
key.orig = orig;
key.orig_len = len;
- ret = hashmap_get_entry(map, &key, hash, &key);
+ /* First check if it's a token the user configured manually... */
+ if (anonymized_seeds.cmpfn)
+ ret = hashmap_get_entry(&anonymized_seeds, &key, hash, &key);
+ else
+ ret = NULL;
+
+ /* ...otherwise check if we've already seen it in this context... */
+ if (!ret)
+ ret = hashmap_get_entry(map, &key, hash, &key);
+
+ /* ...and finally generate a new mapping if necessary */
if (!ret) {
FLEX_ALLOC_MEM(ret, orig, orig, len);
hashmap_entry_init(&ret->hash, key.hash.hash);
@@ -1147,6 +1158,37 @@ static void handle_deletes(void)
}
}
+static char *anonymize_seed(void *data)
+{
+ return xstrdup(data);
+}
+
+static int parse_opt_anonymize_map(const struct option *opt,
+ const char *arg, int unset)
+{
+ struct hashmap *map = opt->value;
+ const char *delim, *value;
+ size_t keylen;
+
+ BUG_ON_OPT_NEG(unset);
+
+ delim = strchr(arg, ':');
+ if (delim) {
+ keylen = delim - arg;
+ value = delim + 1;
+ } else {
+ keylen = strlen(arg);
+ value = arg;
+ }
+
+ if (!keylen || !*value)
+ return error(_("--anonymize-map token cannot be empty"));
+
+ anonymize_str(map, anonymize_seed, arg, keylen, (void *)value);
+
+ return 0;
+}
+
int cmd_fast_export(int argc, const char **argv, const char *prefix)
{
struct rev_info revs;
@@ -1188,6 +1230,9 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix)
OPT_STRING_LIST(0, "refspec", &refspecs_list, N_("refspec"),
N_("Apply refspec to exported refs")),
OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")),
+ OPT_CALLBACK_F(0, "anonymize-map", &anonymized_seeds, N_("from:to"),
+ N_("convert <from> to <to> in anonymized output"),
+ PARSE_OPT_NONEG, parse_opt_anonymize_map),
OPT_BOOL(0, "reference-excluded-parents",
&reference_excluded_commits, N_("Reference parents which are not in fast-export stream by object id")),
OPT_BOOL(0, "show-original-ids", &show_original_ids,
@@ -1215,6 +1260,9 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix)
if (argc > 1)
usage_with_options (fast_export_usage, options);
+ if (anonymized_seeds.cmpfn && !anonymize)
+ die(_("--anonymize-map without --anonymize does not make sense"));
+
if (refspecs_list.nr) {
int i;
diff --git a/t/t9351-fast-export-anonymize.sh b/t/t9351-fast-export-anonymize.sh
index dc5d75cd19..5a21c71568 100755
--- a/t/t9351-fast-export-anonymize.sh
+++ b/t/t9351-fast-export-anonymize.sh
@@ -6,6 +6,7 @@ test_description='basic tests for fast-export --anonymize'
test_expect_success 'setup simple repo' '
test_commit base &&
test_commit foo &&
+ test_commit retain-me &&
git checkout -b other HEAD^ &&
mkdir subdir &&
test_commit subdir/bar &&
@@ -18,7 +19,10 @@ test_expect_success 'setup simple repo' '
'
test_expect_success 'export anonymized stream' '
- git fast-export --anonymize --all >stream
+ git fast-export --anonymize --all \
+ --anonymize-map=retain-me \
+ --anonymize-map=xyzzy:custom-name \
+ >stream
'
# this also covers commit messages
@@ -30,6 +34,11 @@ test_expect_success 'stream omits path names' '
! grep xyzzy stream
'
+test_expect_success 'stream contains user-specified names' '
+ grep retain-me stream &&
+ grep custom-name stream
+'
+
test_expect_success 'stream omits gitlink oids' '
# avoid relying on the whole oid to remain hash-agnostic; this is
# plenty to be unique within our test case