From 525e7fba7854c23ee3530d0bf88d75f106f14c95 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 16 Sep 2019 20:44:31 +0200 Subject: path.c: document the purpose of `is_ntfs_dotgit()` Previously, this function was completely undocumented. It is worth, though, to explain what is going on, as it is not really obvious at all. Suggested-by: Garima Singh Signed-off-by: Johannes Schindelin --- path.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/path.c b/path.c index 9ac0531a29..22bd0b6f52 100644 --- a/path.c +++ b/path.c @@ -1302,6 +1302,34 @@ static int only_spaces_and_periods(const char *path, size_t len, size_t skip) return 1; } +/* + * On NTFS, we need to be careful to disallow certain synonyms of the `.git/` + * directory: + * + * - For historical reasons, file names that end in spaces or periods are + * automatically trimmed. Therefore, `.git . . ./` is a valid way to refer + * to `.git/`. + * + * - For other historical reasons, file names that do not conform to the 8.3 + * format (up to eight characters for the basename, three for the file + * extension, certain characters not allowed such as `+`, etc) are associated + * with a so-called "short name", at least on the `C:` drive by default. + * Which means that `git~1/` is a valid way to refer to `.git/`. + * + * Note: Technically, `.git/` could receive the short name `git~2` if the + * short name `git~1` were already used. In Git, however, we guarantee that + * `.git` is the first item in a directory, therefore it will be associated + * with the short name `git~1` (unless short names are disabled). + * + * When this function returns 1, it indicates that the specified file/directory + * name refers to a `.git` file or directory, or to any of these synonyms, and + * Git should therefore not track it. + * + * This function is intended to be used by `git fsck` even on platforms where + * the backslash is a regular filename character, therefore it needs to handle + * backlash characters in the provided `name` specially: they are interpreted + * as directory separators. + */ int is_ntfs_dotgit(const char *name) { size_t len; -- cgit v1.2.3 From a62f9d1ace8c6556cbc1bb7df69eff0a0bb9e774 Mon Sep 17 00:00:00 2001 From: Garima Singh Date: Wed, 4 Sep 2019 13:36:39 -0400 Subject: test-path-utils: offer to run a protectNTFS/protectHFS benchmark In preparation to flipping the default on `core.protectNTFS`, let's have some way to measure the speed impact of this config setting reliably (and for comparison, the `core.protectHFS` config setting). For now, this is a manual performance benchmark: ./t/helper/test-path-utils protect_ntfs_hfs [arguments...] where the arguments are an optional number of file names to test with, optionally followed by minimum and maximum length of the random file names. The default values are one million, 3 and 20, respectively. Just like `sqrti()` in `bisect.c`, we introduce a very simple function to approximation the square root of a given value, in order to avoid having to introduce the first user of `` in Git's source code. Note: this is _not_ implemented as a Unix shell script in t/perf/ because we really care about _very_ precise timings here, and Unix shell scripts are simply unsuited for precise and consistent benchmarking. Signed-off-by: Garima Singh Signed-off-by: Johannes Schindelin --- t/helper/test-path-utils.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/t/helper/test-path-utils.c b/t/helper/test-path-utils.c index 94846550f7..16d8e689c8 100644 --- a/t/helper/test-path-utils.c +++ b/t/helper/test-path-utils.c @@ -176,6 +176,99 @@ static int is_dotgitmodules(const char *path) return is_hfs_dotgitmodules(path) || is_ntfs_dotgitmodules(path); } +/* + * A very simple, reproducible pseudo-random generator. Copied from + * `test-genrandom.c`. + */ +static uint64_t my_random_value = 1234; + +static uint64_t my_random(void) +{ + my_random_value = my_random_value * 1103515245 + 12345; + return my_random_value; +} + +/* + * A fast approximation of the square root, without requiring math.h. + * + * It uses Newton's method to approximate the solution of 0 = x^2 - value. + */ +static double my_sqrt(double value) +{ + const double epsilon = 1e-6; + double x = value; + + if (value == 0) + return 0; + + for (;;) { + double delta = (value / x - x) / 2; + if (delta < epsilon && delta > -epsilon) + return x + delta; + x += delta; + } +} + +static int protect_ntfs_hfs_benchmark(int argc, const char **argv) +{ + size_t i, j, nr, min_len = 3, max_len = 20; + char **names; + int repetitions = 15, file_mode = 0100644; + uint64_t begin, end; + double m[3][2], v[3][2]; + uint64_t cumul; + double cumul2; + + if (argc > 1 && !strcmp(argv[1], "--with-symlink-mode")) { + file_mode = 0120000; + argc--; + argv++; + } + + nr = argc > 1 ? strtoul(argv[1], NULL, 0) : 1000000; + ALLOC_ARRAY(names, nr); + + if (argc > 2) { + min_len = strtoul(argv[2], NULL, 0); + if (argc > 3) + max_len = strtoul(argv[3], NULL, 0); + if (min_len > max_len) + die("min_len > max_len"); + } + + for (i = 0; i < nr; i++) { + size_t len = min_len + (my_random() % (max_len + 1 - min_len)); + + names[i] = xmallocz(len); + while (len > 0) + names[i][--len] = (char)(' ' + (my_random() % ('\x7f' - ' '))); + } + + for (protect_ntfs = 0; protect_ntfs < 2; protect_ntfs++) + for (protect_hfs = 0; protect_hfs < 2; protect_hfs++) { + cumul = 0; + cumul2 = 0; + for (i = 0; i < repetitions; i++) { + begin = getnanotime(); + for (j = 0; j < nr; j++) + verify_path(names[j], file_mode); + end = getnanotime(); + printf("protect_ntfs = %d, protect_hfs = %d: %lfms\n", protect_ntfs, protect_hfs, (end-begin) / (double)1e6); + cumul += end - begin; + cumul2 += (end - begin) * (end - begin); + } + m[protect_ntfs][protect_hfs] = cumul / (double)repetitions; + v[protect_ntfs][protect_hfs] = my_sqrt(cumul2 / (double)repetitions - m[protect_ntfs][protect_hfs] * m[protect_ntfs][protect_hfs]); + printf("mean: %lfms, stddev: %lfms\n", m[protect_ntfs][protect_hfs] / (double)1e6, v[protect_ntfs][protect_hfs] / (double)1e6); + } + + for (protect_ntfs = 0; protect_ntfs < 2; protect_ntfs++) + for (protect_hfs = 0; protect_hfs < 2; protect_hfs++) + printf("ntfs=%d/hfs=%d: %lf%% slower\n", protect_ntfs, protect_hfs, (m[protect_ntfs][protect_hfs] - m[0][0]) * 100 / m[0][0]); + + return 0; +} + int cmd_main(int argc, const char **argv) { if (argc == 3 && !strcmp(argv[1], "normalize_path_copy")) { @@ -290,6 +383,9 @@ int cmd_main(int argc, const char **argv) return !!res; } + if (argc > 1 && !strcmp(argv[1], "protect_ntfs_hfs")) + return !!protect_ntfs_hfs_benchmark(argc - 1, argv + 1); + fprintf(stderr, "%s: unknown function name: %s\n", argv[0], argv[1] ? argv[1] : "(there was none)"); return 1; -- cgit v1.2.3 From 288a74bcd28229a00c3632f18cba92dbfdf73ee9 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 23 Sep 2019 08:58:11 +0200 Subject: is_ntfs_dotgit(): only verify the leading segment The config setting `core.protectNTFS` is specifically designed to work not only on Windows, but anywhere, to allow for repositories hosted on, say, Linux servers to be protected against NTFS-specific attack vectors. As a consequence, `is_ntfs_dotgit()` manually splits backslash-separated paths (but does not do the same for paths separated by forward slashes), under the assumption that the backslash might not be a valid directory separator on the _current_ Operating System. However, the two callers, `verify_path()` and `fsck_tree()`, are supposed to feed only individual path segments to the `is_ntfs_dotgit()` function. This causes a lot of duplicate scanning (and very inefficient scanning, too, as the inner loop of `is_ntfs_dotgit()` was optimized for readability rather than for speed. Let's simplify the design of `is_ntfs_dotgit()` by putting the burden of splitting the paths by backslashes as directory separators on the callers of said function. Consequently, the `verify_path()` function, which already splits the path by directory separators, now treats backslashes as directory separators _explicitly_ when `core.protectNTFS` is turned on, even on platforms where the backslash is _not_ a directory separator. Note that we have to repeat some code in `verify_path()`: if the backslash is not a directory separator on the current Operating System, we want to allow file names like `\`, but we _do_ want to disallow paths that are clearly intended to cause harm when the repository is cloned on Windows. The `fsck_tree()` function (the other caller of `is_ntfs_dotgit()`) now needs to look for backslashes in tree entries' names specifically when `core.protectNTFS` is turned on. While it would be tempting to completely disallow backslashes in that case (much like `fsck` reports names containing forward slashes as "full paths"), this would be overzealous: when `core.protectNTFS` is turned on in a non-Windows setup, backslashes are perfectly valid characters in file names while we _still_ want to disallow tree entries that are clearly designed to exploit NTFS-specific behavior. This simplification will make subsequent changes easier to implement, such as turning `core.protectNTFS` on by default (not only on Windows) or protecting against attack vectors involving NTFS Alternate Data Streams. Incidentally, this change allows for catching malicious repositories that contain tree entries of the form `dir\.gitmodules` already on the server side rather than only on the client side (and previously only on Windows): in contrast to `is_ntfs_dotgit()`, the `is_ntfs_dotgitmodules()` function already expects the caller to split the paths by directory separators. Signed-off-by: Johannes Schindelin --- fsck.c | 11 ++++++++++- path.c | 5 +---- read-cache.c | 8 ++++++++ 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/fsck.c b/fsck.c index b1579c7e28..d80a96f4be 100644 --- a/fsck.c +++ b/fsck.c @@ -551,7 +551,7 @@ static int fsck_tree(struct tree *item, struct fsck_options *options) while (desc.size) { unsigned mode; - const char *name; + const char *name, *backslash; const struct object_id *oid; oid = tree_entry_extract(&desc, &name, &mode); @@ -565,6 +565,15 @@ static int fsck_tree(struct tree *item, struct fsck_options *options) is_hfs_dotgit(name) || is_ntfs_dotgit(name)); has_zero_pad |= *(char *)desc.buffer == '0'; + + if ((backslash = strchr(name, '\\'))) { + while (backslash) { + backslash++; + has_dotgit |= is_ntfs_dotgit(backslash); + backslash = strchr(backslash, '\\'); + } + } + if (update_tree_entry_gently(&desc)) { retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree"); break; diff --git a/path.c b/path.c index 22bd0b6f52..f62a37d5f5 100644 --- a/path.c +++ b/path.c @@ -1342,10 +1342,7 @@ int is_ntfs_dotgit(const char *name) if (only_spaces_and_periods(name, len, 5) && !strncasecmp(name, "git~1", 5)) return 1; - if (name[len] != '\\') - return 0; - name += len + 1; - len = -1; + return 0; } } diff --git a/read-cache.c b/read-cache.c index 5b57b369e8..bde1e70c51 100644 --- a/read-cache.c +++ b/read-cache.c @@ -874,7 +874,15 @@ inside: if ((c == '.' && !verify_dotfile(path, mode)) || is_dir_sep(c) || c == '\0') return 0; + } else if (c == '\\' && protect_ntfs) { + if (is_ntfs_dotgit(path)) + return 0; + if (S_ISLNK(mode)) { + if (is_ntfs_dotgitmodules(path)) + return 0; + } } + c = *path++; } } -- cgit v1.2.3 From 7c3745fc6185495d5765628b4dfe1bd2c25a2981 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 28 Aug 2019 12:22:17 +0200 Subject: path: safeguard `.git` against NTFS Alternate Streams Accesses Probably inspired by HFS' resource streams, NTFS supports "Alternate Data Streams": by appending `:` to the file name, information in addition to the file contents can be written and read, information that is copied together with the file (unless copied to a non-NTFS location). These Alternate Data Streams are typically used for things like marking an executable as having just been downloaded from the internet (and hence not necessarily being trustworthy). In addition to a stream name, a stream type can be appended, like so: `::`. Unless specified, the default stream type is `$DATA` for files and `$INDEX_ALLOCATION` for directories. In other words, `.git::$INDEX_ALLOCATION` is a valid way to reference the `.git` directory! In our work in Git v2.2.1 to protect Git on NTFS drives under `core.protectNTFS`, we focused exclusively on NTFS short names, unaware of the fact that NTFS Alternate Data Streams offer a similar attack vector. Let's fix this. Seeing as it is better to be safe than sorry, we simply disallow paths referring to *any* NTFS Alternate Data Stream of `.git`, not just `::$INDEX_ALLOCATION`. This also simplifies the implementation. This closes CVE-2019-1352. Further reading about NTFS Alternate Data Streams: https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/c54dec26-1551-4d3a-a0ea-4fa40f848eb3 Reported-by: Nicolas Joly Signed-off-by: Johannes Schindelin --- path.c | 12 +++++++++++- t/t1014-read-tree-confusing.sh | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/path.c b/path.c index f62a37d5f5..e39ecf4689 100644 --- a/path.c +++ b/path.c @@ -1321,10 +1321,19 @@ static int only_spaces_and_periods(const char *path, size_t len, size_t skip) * `.git` is the first item in a directory, therefore it will be associated * with the short name `git~1` (unless short names are disabled). * + * - For yet other historical reasons, NTFS supports so-called "Alternate Data + * Streams", i.e. metadata associated with a given file, referred to via + * `::`. There exists a default stream + * type for directories, allowing `.git/` to be accessed via + * `.git::$INDEX_ALLOCATION/`. + * * When this function returns 1, it indicates that the specified file/directory * name refers to a `.git` file or directory, or to any of these synonyms, and * Git should therefore not track it. * + * For performance reasons, _all_ Alternate Data Streams of `.git/` are + * forbidden, not just `::$INDEX_ALLOCATION`. + * * This function is intended to be used by `git fsck` even on platforms where * the backslash is a regular filename character, therefore it needs to handle * backlash characters in the provided `name` specially: they are interpreted @@ -1335,7 +1344,8 @@ int is_ntfs_dotgit(const char *name) size_t len; for (len = 0; ; len++) - if (!name[len] || name[len] == '\\' || is_dir_sep(name[len])) { + if (!name[len] || name[len] == '\\' || is_dir_sep(name[len]) || + name[len] == ':') { if (only_spaces_and_periods(name, len, 4) && !strncasecmp(name, ".git", 4)) return 1; diff --git a/t/t1014-read-tree-confusing.sh b/t/t1014-read-tree-confusing.sh index 2f5a25d503..da3376b3bb 100755 --- a/t/t1014-read-tree-confusing.sh +++ b/t/t1014-read-tree-confusing.sh @@ -49,6 +49,7 @@ git~1 .git.SPACE .git.{space} .\\\\.GIT\\\\foobar backslashes .git\\\\foobar backslashes2 +.git...:alternate-stream EOF test_expect_success 'utf-8 paths allowed with core.protectHFS off' ' -- cgit v1.2.3 From 91bd46588e6959e6903e275f78b10bd07830d547 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 28 Aug 2019 12:22:17 +0200 Subject: path: also guard `.gitmodules` against NTFS Alternate Data Streams We just safe-guarded `.git` against NTFS Alternate Data Stream-related attack vectors, and now it is time to do the same for `.gitmodules`. Note: In the added regression test, we refrain from verifying all kinds of variations between short names and NTFS Alternate Data Streams: as the new code disallows _all_ Alternate Data Streams of `.gitmodules`, it is enough to test one in order to know that all of them are guarded against. Signed-off-by: Johannes Schindelin --- path.c | 2 +- t/t0060-path-utils.sh | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/path.c b/path.c index e39ecf4689..2037e2d8c1 100644 --- a/path.c +++ b/path.c @@ -1369,7 +1369,7 @@ static int is_ntfs_dot_generic(const char *name, only_spaces_and_periods: for (;;) { char c = name[i++]; - if (!c) + if (!c || c == ':') return 1; if (c != ' ' && c != '.') return 0; diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh index 3f3357ed9f..2b8589e921 100755 --- a/t/t0060-path-utils.sh +++ b/t/t0060-path-utils.sh @@ -408,6 +408,9 @@ test_expect_success 'match .gitmodules' ' ~1000000 \ ~9999999 \ \ + .gitmodules:\$DATA \ + "gitmod~4 . :\$DATA" \ + \ --not \ ".gitmodules x" \ ".gitmodules .x" \ @@ -432,7 +435,9 @@ test_expect_success 'match .gitmodules' ' \ GI7EB~1 \ GI7EB~01 \ - GI7EB~1X + GI7EB~1X \ + \ + .gitmodules,:\$DATA ' test_done -- cgit v1.2.3