From a62f9d1ace8c6556cbc1bb7df69eff0a0bb9e774 Mon Sep 17 00:00:00 2001 From: Garima Singh Date: Wed, 4 Sep 2019 13:36:39 -0400 Subject: test-path-utils: offer to run a protectNTFS/protectHFS benchmark In preparation to flipping the default on `core.protectNTFS`, let's have some way to measure the speed impact of this config setting reliably (and for comparison, the `core.protectHFS` config setting). For now, this is a manual performance benchmark: ./t/helper/test-path-utils protect_ntfs_hfs [arguments...] where the arguments are an optional number of file names to test with, optionally followed by minimum and maximum length of the random file names. The default values are one million, 3 and 20, respectively. Just like `sqrti()` in `bisect.c`, we introduce a very simple function to approximation the square root of a given value, in order to avoid having to introduce the first user of `` in Git's source code. Note: this is _not_ implemented as a Unix shell script in t/perf/ because we really care about _very_ precise timings here, and Unix shell scripts are simply unsuited for precise and consistent benchmarking. Signed-off-by: Garima Singh Signed-off-by: Johannes Schindelin --- t/helper/test-path-utils.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) (limited to 't/helper') diff --git a/t/helper/test-path-utils.c b/t/helper/test-path-utils.c index 94846550f7..16d8e689c8 100644 --- a/t/helper/test-path-utils.c +++ b/t/helper/test-path-utils.c @@ -176,6 +176,99 @@ static int is_dotgitmodules(const char *path) return is_hfs_dotgitmodules(path) || is_ntfs_dotgitmodules(path); } +/* + * A very simple, reproducible pseudo-random generator. Copied from + * `test-genrandom.c`. + */ +static uint64_t my_random_value = 1234; + +static uint64_t my_random(void) +{ + my_random_value = my_random_value * 1103515245 + 12345; + return my_random_value; +} + +/* + * A fast approximation of the square root, without requiring math.h. + * + * It uses Newton's method to approximate the solution of 0 = x^2 - value. + */ +static double my_sqrt(double value) +{ + const double epsilon = 1e-6; + double x = value; + + if (value == 0) + return 0; + + for (;;) { + double delta = (value / x - x) / 2; + if (delta < epsilon && delta > -epsilon) + return x + delta; + x += delta; + } +} + +static int protect_ntfs_hfs_benchmark(int argc, const char **argv) +{ + size_t i, j, nr, min_len = 3, max_len = 20; + char **names; + int repetitions = 15, file_mode = 0100644; + uint64_t begin, end; + double m[3][2], v[3][2]; + uint64_t cumul; + double cumul2; + + if (argc > 1 && !strcmp(argv[1], "--with-symlink-mode")) { + file_mode = 0120000; + argc--; + argv++; + } + + nr = argc > 1 ? strtoul(argv[1], NULL, 0) : 1000000; + ALLOC_ARRAY(names, nr); + + if (argc > 2) { + min_len = strtoul(argv[2], NULL, 0); + if (argc > 3) + max_len = strtoul(argv[3], NULL, 0); + if (min_len > max_len) + die("min_len > max_len"); + } + + for (i = 0; i < nr; i++) { + size_t len = min_len + (my_random() % (max_len + 1 - min_len)); + + names[i] = xmallocz(len); + while (len > 0) + names[i][--len] = (char)(' ' + (my_random() % ('\x7f' - ' '))); + } + + for (protect_ntfs = 0; protect_ntfs < 2; protect_ntfs++) + for (protect_hfs = 0; protect_hfs < 2; protect_hfs++) { + cumul = 0; + cumul2 = 0; + for (i = 0; i < repetitions; i++) { + begin = getnanotime(); + for (j = 0; j < nr; j++) + verify_path(names[j], file_mode); + end = getnanotime(); + printf("protect_ntfs = %d, protect_hfs = %d: %lfms\n", protect_ntfs, protect_hfs, (end-begin) / (double)1e6); + cumul += end - begin; + cumul2 += (end - begin) * (end - begin); + } + m[protect_ntfs][protect_hfs] = cumul / (double)repetitions; + v[protect_ntfs][protect_hfs] = my_sqrt(cumul2 / (double)repetitions - m[protect_ntfs][protect_hfs] * m[protect_ntfs][protect_hfs]); + printf("mean: %lfms, stddev: %lfms\n", m[protect_ntfs][protect_hfs] / (double)1e6, v[protect_ntfs][protect_hfs] / (double)1e6); + } + + for (protect_ntfs = 0; protect_ntfs < 2; protect_ntfs++) + for (protect_hfs = 0; protect_hfs < 2; protect_hfs++) + printf("ntfs=%d/hfs=%d: %lf%% slower\n", protect_ntfs, protect_hfs, (m[protect_ntfs][protect_hfs] - m[0][0]) * 100 / m[0][0]); + + return 0; +} + int cmd_main(int argc, const char **argv) { if (argc == 3 && !strcmp(argv[1], "normalize_path_copy")) { @@ -290,6 +383,9 @@ int cmd_main(int argc, const char **argv) return !!res; } + if (argc > 1 && !strcmp(argv[1], "protect_ntfs_hfs")) + return !!protect_ntfs_hfs_benchmark(argc - 1, argv + 1); + fprintf(stderr, "%s: unknown function name: %s\n", argv[0], argv[1] ? argv[1] : "(there was none)"); return 1; -- cgit v1.2.3 From ad1559252945179e28fba7d693494051352810c5 Mon Sep 17 00:00:00 2001 From: Garima Singh Date: Wed, 18 Sep 2019 16:03:59 -0400 Subject: tests: add a helper to stress test argument quoting On Windows, we have to do all the command-line argument quoting ourselves. Worse: we have to have two versions of said quoting, one for MSYS2 programs (which have their own dequoting rules) and the rest. We care mostly about the rest, and to make sure that that works, let's have a stress test that comes up with all kinds of awkward arguments, verifying that a spawned sub-process receives those unharmed. Signed-off-by: Garima Singh Signed-off-by: Johannes Schindelin --- t/helper/test-run-command.c | 118 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 116 insertions(+), 2 deletions(-) (limited to 't/helper') diff --git a/t/helper/test-run-command.c b/t/helper/test-run-command.c index d24d157379..6c801cb529 100644 --- a/t/helper/test-run-command.c +++ b/t/helper/test-run-command.c @@ -12,8 +12,8 @@ #include "run-command.h" #include "argv-array.h" #include "strbuf.h" -#include -#include +#include "gettext.h" +#include "parse-options.h" static int number_callbacks; static int parallel_next(struct child_process *cp, @@ -49,11 +49,125 @@ static int task_finished(int result, return 1; } +static uint64_t my_random_next = 1234; + +static uint64_t my_random(void) +{ + uint64_t res = my_random_next; + my_random_next = my_random_next * 1103515245 + 12345; + return res; +} + +static int quote_stress_test(int argc, const char **argv) +{ + /* + * We are running a quote-stress test. + * spawn a subprocess that runs quote-stress with a + * special option that echoes back the arguments that + * were passed in. + */ + char special[] = ".?*\\^_\"'`{}()[]<>@~&+:;$%"; // \t\r\n\a"; + int i, j, k, trials = 100; + struct strbuf out = STRBUF_INIT; + struct argv_array args = ARGV_ARRAY_INIT; + struct option options[] = { + OPT_INTEGER('n', "trials", &trials, "Number of trials"), + OPT_END() + }; + const char * const usage[] = { + "test-run-command quote-stress-test ", + NULL + }; + + argc = parse_options(argc, argv, NULL, options, usage, 0); + + for (i = 0; i < trials; i++) { + struct child_process cp = CHILD_PROCESS_INIT; + size_t arg_count = 1 + (my_random() % 5), arg_offset; + int ret = 0; + + argv_array_clear(&args); + argv_array_pushl(&args, "test-run-command", + "quote-echo", NULL); + arg_offset = args.argc; + for (j = 0; j < arg_count; j++) { + char buf[20]; + size_t min_len = 1; + size_t arg_len = min_len + + (my_random() % (ARRAY_SIZE(buf) - min_len)); + + for (k = 0; k < arg_len; k++) + buf[k] = special[my_random() % + ARRAY_SIZE(special)]; + buf[arg_len] = '\0'; + + argv_array_push(&args, buf); + } + + cp.argv = args.argv; + strbuf_reset(&out); + if (pipe_command(&cp, NULL, 0, &out, 0, NULL, 0) < 0) + return error("Failed to spawn child process"); + + for (j = 0, k = 0; j < arg_count; j++) { + const char *arg = args.argv[j + arg_offset]; + + if (strcmp(arg, out.buf + k)) + ret = error("incorrectly quoted arg: '%s', " + "echoed back as '%s'", + arg, out.buf + k); + k += strlen(out.buf + k) + 1; + } + + if (k != out.len) + ret = error("got %d bytes, but consumed only %d", + (int)out.len, (int)k); + + if (ret) { + fprintf(stderr, "Trial #%d failed. Arguments:\n", i); + for (j = 0; j < arg_count; j++) + fprintf(stderr, "arg #%d: '%s'\n", + (int)j, args.argv[j + arg_offset]); + + strbuf_release(&out); + argv_array_clear(&args); + + return ret; + } + + if (i && (i % 100) == 0) + fprintf(stderr, "Trials completed: %d\n", (int)i); + } + + strbuf_release(&out); + argv_array_clear(&args); + + return 0; +} + +static int quote_echo(int argc, const char **argv) +{ + while (argc > 1) { + fwrite(argv[1], strlen(argv[1]), 1, stdout); + fputc('\0', stdout); + argv++; + argc--; + } + + return 0; +} + int cmd_main(int argc, const char **argv) { struct child_process proc = CHILD_PROCESS_INIT; int jobs; + if (argc >= 2 && !strcmp(argv[1], "quote-stress-test")) + return !!quote_stress_test(argc - 1, argv + 1); + + if (argc >= 2 && !strcmp(argv[1], "quote-echo")) + return !!quote_echo(argc - 1, argv + 1); + if (argc < 3) return 1; proc.argv = (const char **)argv + 2; -- cgit v1.2.3 From 55953c77c0bfcb727ffd7e293e4661b7a24b791b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 20 Sep 2019 19:09:39 +0200 Subject: quote-stress-test: accept arguments to test via the command-line When the stress test reported a problem with quoting certain arguments, it is helpful to have a facility to play with those arguments in order to find out whether variations of those arguments are affected, too. Let's allow `test-run-command quote-stress-test -- ` to be used for that purpose. Signed-off-by: Johannes Schindelin --- t/helper/test-run-command.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) (limited to 't/helper') diff --git a/t/helper/test-run-command.c b/t/helper/test-run-command.c index 6c801cb529..bdbc5ec56a 100644 --- a/t/helper/test-run-command.c +++ b/t/helper/test-run-command.c @@ -83,25 +83,34 @@ static int quote_stress_test(int argc, const char **argv) for (i = 0; i < trials; i++) { struct child_process cp = CHILD_PROCESS_INIT; - size_t arg_count = 1 + (my_random() % 5), arg_offset; + size_t arg_count, arg_offset; int ret = 0; argv_array_clear(&args); argv_array_pushl(&args, "test-run-command", "quote-echo", NULL); arg_offset = args.argc; - for (j = 0; j < arg_count; j++) { - char buf[20]; - size_t min_len = 1; - size_t arg_len = min_len + - (my_random() % (ARRAY_SIZE(buf) - min_len)); - - for (k = 0; k < arg_len; k++) - buf[k] = special[my_random() % - ARRAY_SIZE(special)]; - buf[arg_len] = '\0'; - - argv_array_push(&args, buf); + + if (argc > 0) { + trials = 1; + arg_count = argc; + for (j = 0; j < arg_count; j++) + argv_array_push(&args, argv[j]); + } else { + arg_count = 1 + (my_random() % 5); + for (j = 0; j < arg_count; j++) { + char buf[20]; + size_t min_len = 1; + size_t arg_len = min_len + + (my_random() % (ARRAY_SIZE(buf) - min_len)); + + for (k = 0; k < arg_len; k++) + buf[k] = special[my_random() % + ARRAY_SIZE(special)]; + buf[arg_len] = '\0'; + + argv_array_push(&args, buf); + } } cp.argv = args.argv; -- cgit v1.2.3 From 7530a6287e20a74b9fe6d4ca3a66df0f0f5cc52c Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 19 Sep 2019 23:46:31 +0200 Subject: quote-stress-test: allow skipping some trials When the, say, 93rd trial run fails, it is a good idea to have a way to skip the first 92 trials and dig directly into the 93rd in a debugger. Signed-off-by: Johannes Schindelin --- t/helper/test-run-command.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 't/helper') diff --git a/t/helper/test-run-command.c b/t/helper/test-run-command.c index bdbc5ec56a..07989f78ec 100644 --- a/t/helper/test-run-command.c +++ b/t/helper/test-run-command.c @@ -67,11 +67,12 @@ static int quote_stress_test(int argc, const char **argv) * were passed in. */ char special[] = ".?*\\^_\"'`{}()[]<>@~&+:;$%"; // \t\r\n\a"; - int i, j, k, trials = 100; + int i, j, k, trials = 100, skip = 0; struct strbuf out = STRBUF_INIT; struct argv_array args = ARGV_ARRAY_INIT; struct option options[] = { OPT_INTEGER('n', "trials", &trials, "Number of trials"), + OPT_INTEGER('s', "skip", &skip, "Skip trials"), OPT_END() }; const char * const usage[] = { @@ -113,6 +114,9 @@ static int quote_stress_test(int argc, const char **argv) } } + if (i < skip) + continue; + cp.argv = args.argv; strbuf_reset(&out); if (pipe_command(&cp, NULL, 0, &out, 0, NULL, 0) < 0) -- cgit v1.2.3 From 379e51d1ae668a1f26d50eb59b3f8befc1eb8883 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 20 Sep 2019 00:12:37 +0200 Subject: quote-stress-test: offer to test quoting arguments for MSYS2 sh It is unfortunate that we need to quote arguments differently on Windows, depending whether we build a command-line for MSYS2's `sh` or for other Windows executables. We already have a test helper to verify the latter, with this patch we can also verify the former. Signed-off-by: Johannes Schindelin --- t/helper/test-run-command.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 't/helper') diff --git a/t/helper/test-run-command.c b/t/helper/test-run-command.c index 07989f78ec..b622334407 100644 --- a/t/helper/test-run-command.c +++ b/t/helper/test-run-command.c @@ -67,12 +67,13 @@ static int quote_stress_test(int argc, const char **argv) * were passed in. */ char special[] = ".?*\\^_\"'`{}()[]<>@~&+:;$%"; // \t\r\n\a"; - int i, j, k, trials = 100, skip = 0; + int i, j, k, trials = 100, skip = 0, msys2 = 0; struct strbuf out = STRBUF_INIT; struct argv_array args = ARGV_ARRAY_INIT; struct option options[] = { OPT_INTEGER('n', "trials", &trials, "Number of trials"), OPT_INTEGER('s', "skip", &skip, "Skip trials"), + OPT_BOOL('m', "msys2", &msys2, "Test quoting for MSYS2's sh"), OPT_END() }; const char * const usage[] = { @@ -82,14 +83,20 @@ static int quote_stress_test(int argc, const char **argv) argc = parse_options(argc, argv, NULL, options, usage, 0); + setenv("MSYS_NO_PATHCONV", "1", 0); + for (i = 0; i < trials; i++) { struct child_process cp = CHILD_PROCESS_INIT; size_t arg_count, arg_offset; int ret = 0; argv_array_clear(&args); - argv_array_pushl(&args, "test-run-command", - "quote-echo", NULL); + if (msys2) + argv_array_pushl(&args, "sh", "-c", + "printf %s\\\\0 \"$@\"", "skip", NULL); + else + argv_array_pushl(&args, "test-run-command", + "quote-echo", NULL); arg_offset = args.argc; if (argc > 0) { -- cgit v1.2.3 From d2c84dad1c88f40906799bc879f70b965efd8ba6 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Sep 2019 13:27:53 +0200 Subject: mingw: refuse to access paths with trailing spaces or periods When creating a directory on Windows whose path ends in a space or a period (or chains thereof), the Win32 API "helpfully" trims those. For example, `mkdir("abc ");` will return success, but actually create a directory called `abc` instead. This stems back to the DOS days, when all file names had exactly 8 characters plus exactly 3 characters for the file extension, and the only way to have shorter names was by padding with spaces. Sadly, this "helpful" behavior is a bit inconsistent: after a successful `mkdir("abc ");`, a `mkdir("abc /def")` will actually _fail_ (because the directory `abc ` does not actually exist). Even if it would work, we now have a serious problem because a Git repository could contain directories `abc` and `abc `, and on Windows, they would be "merged" unintentionally. As these paths are illegal on Windows, anyway, let's disallow any accesses to such paths on that Operating System. For practical reasons, this behavior is still guarded by the config setting `core.protectNTFS`: it is possible (and at least two regression tests make use of it) to create commits without involving the worktree. In such a scenario, it is of course possible -- even on Windows -- to create such file names. Among other consequences, this patch disallows submodules' paths to end in spaces on Windows (which would formerly have confused Git enough to try to write into incorrect paths, anyway). While this patch does not fix a vulnerability on its own, it prevents an attack vector that was exploited in demonstrations of a number of recently-fixed security bugs. The regression test added to `t/t7417-submodule-path-url.sh` reflects that attack vector. Note that we have to adjust the test case "prevent git~1 squatting on Windows" in `t/t7415-submodule-names.sh` because of a very subtle issue. It tries to clone two submodules whose names differ only in a trailing period character, and as a consequence their git directories differ in the same way. Previously, when Git tried to clone the second submodule, it thought that the git directory already existed (because on Windows, when you create a directory with the name `b.` it actually creates `b`), but with this patch, the first submodule's clone will fail because of the illegal name of the git directory. Therefore, when cloning the second submodule, Git will take a different code path: a fresh clone (without an existing git directory). Both code paths fail to clone the second submodule, both because the the corresponding worktree directory exists and is not empty, but the error messages are worded differently. Signed-off-by: Johannes Schindelin --- t/helper/test-path-utils.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 't/helper') diff --git a/t/helper/test-path-utils.c b/t/helper/test-path-utils.c index 16d8e689c8..8b3ce07860 100644 --- a/t/helper/test-path-utils.c +++ b/t/helper/test-path-utils.c @@ -386,6 +386,23 @@ int cmd_main(int argc, const char **argv) if (argc > 1 && !strcmp(argv[1], "protect_ntfs_hfs")) return !!protect_ntfs_hfs_benchmark(argc - 1, argv + 1); + if (argc > 1 && !strcmp(argv[1], "is_valid_path")) { + int res = 0, expect = 1, i; + + for (i = 2; i < argc; i++) + if (!strcmp("--not", argv[i])) + expect = 0; + else if (expect != is_valid_path(argv[i])) + res = error("'%s' is%s a valid path", + argv[i], expect ? " not" : ""); + else + fprintf(stderr, + "'%s' is%s a valid path\n", + argv[i], expect ? "" : " not"); + + return !!res; + } + fprintf(stderr, "%s: unknown function name: %s\n", argv[0], argv[1] ? argv[1] : "(there was none)"); return 1; -- cgit v1.2.3