From 4616918013bf4fb3ce61175702d963a1fdd87f84 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 24 Nov 2014 13:36:51 -0500 Subject: unpack-trees: propagate errors adding entries to the index When unpack_trees tries to write an entry to the index, add_index_entry may report an error to stderr, but we ignore its return value. This leads to us returning a successful exit code for an operation that partially failed. Let's make sure to propagate this code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- unpack-trees.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/unpack-trees.c b/unpack-trees.c index 35cb05e92b..cf8996e0ea 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -102,7 +102,7 @@ void setup_unpack_trees_porcelain(struct unpack_trees_options *opts, opts->unpack_rejects[i].strdup_strings = 1; } -static void do_add_entry(struct unpack_trees_options *o, struct cache_entry *ce, +static int do_add_entry(struct unpack_trees_options *o, struct cache_entry *ce, unsigned int set, unsigned int clear) { clear |= CE_HASHED | CE_UNHASHED; @@ -112,8 +112,8 @@ static void do_add_entry(struct unpack_trees_options *o, struct cache_entry *ce, ce->next = NULL; ce->ce_flags = (ce->ce_flags & ~clear) | set; - add_index_entry(&o->result, ce, - ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE); + return add_index_entry(&o->result, ce, + ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE); } static struct cache_entry *dup_entry(const struct cache_entry *ce) @@ -608,7 +608,9 @@ static int unpack_nondirectories(int n, unsigned long mask, for (i = 0; i < n; i++) if (src[i] && src[i] != o->df_conflict_entry) - do_add_entry(o, src[i], 0, 0); + if (do_add_entry(o, src[i], 0, 0)) + return -1; + return 0; } -- cgit v1.2.3 From 96b50cc19003d54f5962d65597c94e2c52eb22e7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 24 Nov 2014 13:37:56 -0500 Subject: read-tree: add tests for confusing paths like ".." and ".git" We should prevent nonsense paths from entering the index in the first place, as they can cause confusing results if they are ever checked out into the working tree. We already do so, but we never tested it. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t1014-read-tree-confusing.sh | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100755 t/t1014-read-tree-confusing.sh diff --git a/t/t1014-read-tree-confusing.sh b/t/t1014-read-tree-confusing.sh new file mode 100755 index 0000000000..7b31d53196 --- /dev/null +++ b/t/t1014-read-tree-confusing.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +test_description='check that read-tree rejects confusing paths' +. ./test-lib.sh + +test_expect_success 'create base tree' ' + echo content >file && + git add file && + git commit -m base && + blob=$(git rev-parse HEAD:file) && + tree=$(git rev-parse HEAD^{tree}) +' + +while read path; do + test_expect_success "reject $path at end of path" ' + printf "100644 blob %s\t%s" "$blob" "$path" >tree && + bogus=$(git mktree tree && + bogus=$(git mktree Date: Mon, 24 Nov 2014 13:39:12 -0500 Subject: verify_dotfile(): reject .git case-insensitively We do not allow ".git" to enter into the index as a path component, because checking out the result to the working tree may causes confusion for subsequent git commands. However, on case-insensitive file systems, ".Git" or ".GIT" is the same. We should catch and prevent those, too. Note that technically we could allow this for repos on case-sensitive filesystems. But there's not much point. It's unlikely that anybody cares, and it creates a repository that is unexpectedly non-portable to other systems. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- read-cache.c | 5 +++-- t/t1014-read-tree-confusing.sh | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/read-cache.c b/read-cache.c index 33dd676ccb..122be494f3 100644 --- a/read-cache.c +++ b/read-cache.c @@ -759,9 +759,10 @@ static int verify_dotfile(const char *rest) * shares the path end test with the ".." case. */ case 'g': - if (rest[1] != 'i') + case 'G': + if (rest[1] != 'i' && rest[1] != 'I') break; - if (rest[2] != 't') + if (rest[2] != 't' && rest[2] != 'T') break; rest += 2; /* fallthrough */ diff --git a/t/t1014-read-tree-confusing.sh b/t/t1014-read-tree-confusing.sh index 7b31d53196..eff8aedf7a 100755 --- a/t/t1014-read-tree-confusing.sh +++ b/t/t1014-read-tree-confusing.sh @@ -27,6 +27,7 @@ done <<-\EOF . .. .git +.GIT EOF test_done -- cgit v1.2.3 From 450870cba7a9bac94b5527021800bd8bf037c99c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 24 Nov 2014 13:40:11 -0500 Subject: t1450: refactor ".", "..", and ".git" fsck tests We check that fsck notices and complains about confusing paths in trees. However, there are a few shortcomings: 1. We check only for these paths as file entries, not as intermediate paths (so ".git" and not ".git/foo"). 2. We check "." and ".." together, so it is possible that we notice only one and not the other. 3. We repeat a lot of boilerplate. Let's use some loops to be more thorough in our testing, and still end up with shorter code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t1450-fsck.sh | 57 +++++++++++++++++++++++++++------------------------------ 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index d730734fde..4d8a4fe3c7 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -237,35 +237,32 @@ test_expect_success 'fsck notices submodule entry pointing to null sha1' ' ) ' -test_expect_success 'fsck notices "." and ".." in trees' ' - ( - git init dots && - cd dots && - blob=$(echo foo | git hash-object -w --stdin) && - tab=$(printf "\\t") && - git mktree <<-EOF && - 100644 blob $blob$tab. - 100644 blob $blob$tab.. - EOF - git fsck 2>out && - cat out && - grep "warning.*\\." out - ) -' - -test_expect_success 'fsck notices ".git" in trees' ' - ( - git init dotgit && - cd dotgit && - blob=$(echo foo | git hash-object -w --stdin) && - tab=$(printf "\\t") && - git mktree <<-EOF && - 100644 blob $blob$tab.git - EOF - git fsck 2>out && - cat out && - grep "warning.*\\.git" out - ) -' +while read name path; do + while read mode type; do + test_expect_success "fsck notices $path as $type" ' + ( + git init $name-$type && + cd $name-$type && + echo content >file && + git add file && + git commit -m base && + blob=$(git rev-parse :file) && + tree=$(git rev-parse HEAD^{tree}) && + value=$(eval "echo \$$type") && + printf "$mode $type %s\t%s" "$value" "$path" >bad && + git mktree out && + cat out && + grep "warning.*\\." out + )' + done <<-\EOF + 100644 blob + 040000 tree + EOF +done <<-\EOF +dot . +dotdot .. +dotgit .git +EOF test_done -- cgit v1.2.3 From 76e86fc6e3523d28e8db00e7b10c33c553d996b8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 24 Nov 2014 13:40:44 -0500 Subject: fsck: notice .git case-insensitively We complain about ".git" in a tree because it cannot be loaded into the index or checked out. Since we now also reject ".GIT" case-insensitively, fsck should notice the same, so that errors do not propagate. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- fsck.c | 2 +- t/t1450-fsck.sh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fsck.c b/fsck.c index 99c0497674..918bf9a318 100644 --- a/fsck.c +++ b/fsck.c @@ -175,7 +175,7 @@ static int fsck_tree(struct tree *item, int strict, fsck_error error_func) has_dot = 1; if (!strcmp(name, "..")) has_dotdot = 1; - if (!strcmp(name, ".git")) + if (!strcasecmp(name, ".git")) has_dotgit = 1; has_zero_pad |= *(char *)desc.buffer == '0'; update_tree_entry(&desc); diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index 4d8a4fe3c7..0438712553 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -263,6 +263,7 @@ done <<-\EOF dot . dotdot .. dotgit .git +dotgit-case .GIT EOF test_done -- cgit v1.2.3 From 6162a1d323d24fd8cbbb1a6145a91fb849b2568f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 15 Dec 2014 17:56:59 -0500 Subject: utf8: add is_hfs_dotgit() helper We do not allow paths with a ".git" component to be added to the index, as that would mean repository contents could overwrite our repository files. However, asking "is this path the same as .git" is not as simple as strcmp() on some filesystems. HFS+'s case-folding does more than just fold uppercase into lowercase (which we already handle with strcasecmp). It may also skip past certain "ignored" Unicode code points, so that (for example) ".gi\u200ct" is mapped ot ".git". The full list of folds can be found in the tables at: https://www.opensource.apple.com/source/xnu/xnu-1504.15.3/bsd/hfs/hfscommon/Unicode/UCStringCompareData.h Implementing a full "is this path the same as that path" comparison would require us importing the whole set of tables. However, what we want to do is much simpler: we only care about checking ".git". We know that 'G' is the only thing that folds to 'g', and so on, so we really only need to deal with the set of ignored code points, which is much smaller. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- utf8.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ utf8.h | 8 ++++++++ 2 files changed, 72 insertions(+) diff --git a/utf8.c b/utf8.c index 0d20e0acb2..2c6442cc11 100644 --- a/utf8.c +++ b/utf8.c @@ -628,3 +628,67 @@ int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding) return chrlen; } + +/* + * Pick the next char from the stream, folding as an HFS+ filename comparison + * would. Note that this is _not_ complete by any means. It's just enough + * to make is_hfs_dotgit() work, and should not be used otherwise. + */ +static ucs_char_t next_hfs_char(const char **in) +{ + while (1) { + ucs_char_t out = pick_one_utf8_char(in, NULL); + /* + * check for malformed utf8. Technically this + * gets converted to a percent-sequence, but + * returning 0 is good enough for is_hfs_dotgit + * to realize it cannot be .git + */ + if (!*in) + return 0; + + /* these code points are ignored completely */ + switch (out) { + case 0x200c: /* ZERO WIDTH NON-JOINER */ + case 0x200d: /* ZERO WIDTH JOINER */ + case 0x200e: /* LEFT-TO-RIGHT MARK */ + case 0x200f: /* RIGHT-TO-LEFT MARK */ + case 0x202a: /* LEFT-TO-RIGHT EMBEDDING */ + case 0x202b: /* RIGHT-TO-LEFT EMBEDDING */ + case 0x202c: /* POP DIRECTIONAL FORMATTING */ + case 0x202d: /* LEFT-TO-RIGHT OVERRIDE */ + case 0x202e: /* RIGHT-TO-LEFT OVERRIDE */ + case 0x206a: /* INHIBIT SYMMETRIC SWAPPING */ + case 0x206b: /* ACTIVATE SYMMETRIC SWAPPING */ + case 0x206c: /* INHIBIT ARABIC FORM SHAPING */ + case 0x206d: /* ACTIVATE ARABIC FORM SHAPING */ + case 0x206e: /* NATIONAL DIGIT SHAPES */ + case 0x206f: /* NOMINAL DIGIT SHAPES */ + case 0xfeff: /* ZERO WIDTH NO-BREAK SPACE */ + continue; + } + + /* + * there's a great deal of other case-folding that occurs, + * but this is enough to catch anything that will convert + * to ".git" + */ + return tolower(out); + } +} + +int is_hfs_dotgit(const char *path) +{ + ucs_char_t c; + + if (next_hfs_char(&path) != '.' || + next_hfs_char(&path) != 'g' || + next_hfs_char(&path) != 'i' || + next_hfs_char(&path) != 't') + return 0; + c = next_hfs_char(&path); + if (c && !is_dir_sep(c)) + return 0; + + return 1; +} diff --git a/utf8.h b/utf8.h index 65d0e42b96..e4d9183c5f 100644 --- a/utf8.h +++ b/utf8.h @@ -42,4 +42,12 @@ static inline char *reencode_string(const char *in, int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding); +/* + * Returns true if the the path would match ".git" after HFS case-folding. + * The path should be NUL-terminated, but we will match variants of both ".git\0" + * and ".git/..." (but _not_ ".../.git"). This makes it suitable for both fsck + * and verify_path(). + */ +int is_hfs_dotgit(const char *path); + #endif -- cgit v1.2.3 From a42643aa8d88a2278acad2da6bc702e426476e9b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 15 Dec 2014 18:15:20 -0500 Subject: read-cache: optionally disallow HFS+ .git variants The point of disallowing ".git" in the index is that we would never want to accidentally overwrite files in the repository directory. But this means we need to respect the filesystem's idea of when two paths are equal. The prior commit added a helper to make such a comparison for HFS+; let's use it in verify_path. We make this check optional for two reasons: 1. It restricts the set of allowable filenames, which is unnecessary for people who are not on HFS+. In practice this probably doesn't matter, though, as the restricted names are rather obscure and almost certainly would never come up in practice. 2. It has a minor performance penalty for every path we insert into the index. This patch ties the check to the core.protectHFS config option. Though this is expected to be most useful on OS X, we allow it to be set everywhere, as HFS+ may be mounted on other platforms. The variable does default to on for OS X, though. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/config.txt | 5 +++++ cache.h | 1 + config.c | 5 +++++ config.mak.uname | 1 + environment.c | 5 +++++ read-cache.c | 3 +++ t/t1014-read-tree-confusing.sh | 24 ++++++++++++++++++++---- t/test-lib.sh | 6 +++++- 8 files changed, 45 insertions(+), 5 deletions(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index ab26963d61..0677bd8df5 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -234,6 +234,11 @@ core.precomposeunicode:: When false, file names are handled fully transparent by Git, which is backward compatible with older versions of Git. +core.protectHFS:: + If set to true, do not allow checkout of paths that would + be considered equivalent to `.git` on an HFS+ filesystem. + Defaults to `true` on Mac OS, and `false` elsewhere. + core.trustctime:: If false, the ctime differences between the index and the working tree are ignored; useful when the inode change time diff --git a/cache.h b/cache.h index ce377e1354..b600a0c3e4 100644 --- a/cache.h +++ b/cache.h @@ -584,6 +584,7 @@ extern int fsync_object_files; extern int core_preload_index; extern int core_apply_sparse_checkout; extern int precomposed_unicode; +extern int protect_hfs; /* * The character that begins a commented line in user-editable file diff --git a/config.c b/config.c index e1d66a145b..b519cedc01 100644 --- a/config.c +++ b/config.c @@ -881,6 +881,11 @@ static int git_default_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.protecthfs")) { + protect_hfs = git_config_bool(var, value); + return 0; + } + /* Add other config variables here and to Documentation/config.txt. */ return 0; } diff --git a/config.mak.uname b/config.mak.uname index 82d549e48b..23af148837 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -97,6 +97,7 @@ ifeq ($(uname_S),Darwin) HAVE_DEV_TTY = YesPlease COMPAT_OBJS += compat/precompose_utf8.o BASIC_CFLAGS += -DPRECOMPOSE_UNICODE + BASIC_CFLAGS += -DPROTECT_HFS_DEFAULT=1 endif ifeq ($(uname_S),SunOS) NEEDS_SOCKET = YesPlease diff --git a/environment.c b/environment.c index 0a15349cfe..828b574a29 100644 --- a/environment.c +++ b/environment.c @@ -63,6 +63,11 @@ int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ struct startup_info *startup_info; unsigned long pack_size_limit_cfg; +#ifndef PROTECT_HFS_DEFAULT +#define PROTECT_HFS_DEFAULT 0 +#endif +int protect_hfs = PROTECT_HFS_DEFAULT; + /* * The character that begins a commented line in user-editable file * that is subject to stripspace. diff --git a/read-cache.c b/read-cache.c index 122be494f3..7f48a08c15 100644 --- a/read-cache.c +++ b/read-cache.c @@ -14,6 +14,7 @@ #include "resolve-undo.h" #include "strbuf.h" #include "varint.h" +#include "utf8.h" static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int really); @@ -786,6 +787,8 @@ int verify_path(const char *path) return 1; if (is_dir_sep(c)) { inside: + if (protect_hfs && is_hfs_dotgit(path)) + return 0; c = *path++; if ((c == '.' && !verify_dotfile(path)) || is_dir_sep(c) || c == '\0') diff --git a/t/t1014-read-tree-confusing.sh b/t/t1014-read-tree-confusing.sh index eff8aedf7a..ec310d5938 100755 --- a/t/t1014-read-tree-confusing.sh +++ b/t/t1014-read-tree-confusing.sh @@ -11,23 +11,39 @@ test_expect_success 'create base tree' ' tree=$(git rev-parse HEAD^{tree}) ' -while read path; do - test_expect_success "reject $path at end of path" ' +test_expect_success 'enable core.protectHFS for rejection tests' ' + git config core.protectHFS true +' + +while read path pretty; do + : ${pretty:=$path} + test_expect_success "reject $pretty at end of path" ' printf "100644 blob %s\t%s" "$blob" "$path" >tree && bogus=$(git mktree tree && bogus=$(git mktree tree && + ok=$(git mktree Date: Mon, 15 Dec 2014 18:21:57 -0500 Subject: fsck: complain about HFS+ ".git" aliases in trees Now that the index can block pathnames that case-fold to ".git" on HFS+, it would be helpful for fsck to notice such problematic paths. This lets servers which use receive.fsckObjects block them before the damage spreads. Note that the fsck check is always on, even for systems without core.protectHFS set. This is technically more restrictive than we need to be, as a set of users on ext4 could happily use these odd filenames without caring about HFS+. However, on balance, it's helpful for all servers to block these (because the paths can be used for mischief, and servers which bother to fsck would want to stop the spread whether they are on HFS+ themselves or not), and hardly anybody will be affected (because the blocked names are variants of .git with invisible Unicode code-points mixed in, meaning mischief is almost certainly what the tree author had in mind). Ideally these would be controlled by a separate "fsck.protectHFS" flag. However, it would be much nicer to be able to enable/disable _any_ fsck flag individually, and any scheme we choose should match such a system. Given the likelihood of anybody using such a path in practice, it is not unreasonable to wait until such a system materializes. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- fsck.c | 3 ++- t/t1450-fsck.sh | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fsck.c b/fsck.c index 918bf9a318..b49113bf0e 100644 --- a/fsck.c +++ b/fsck.c @@ -6,6 +6,7 @@ #include "commit.h" #include "tag.h" #include "fsck.h" +#include "utf8.h" static int fsck_walk_tree(struct tree *tree, fsck_walk_func walk, void *data) { @@ -175,7 +176,7 @@ static int fsck_tree(struct tree *item, int strict, fsck_error error_func) has_dot = 1; if (!strcmp(name, "..")) has_dotdot = 1; - if (!strcasecmp(name, ".git")) + if (!strcasecmp(name, ".git") || is_hfs_dotgit(name)) has_dotgit = 1; has_zero_pad |= *(char *)desc.buffer == '0'; update_tree_entry(&desc); diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index 0438712553..8158b98e6f 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -237,9 +237,10 @@ test_expect_success 'fsck notices submodule entry pointing to null sha1' ' ) ' -while read name path; do +while read name path pretty; do while read mode type; do - test_expect_success "fsck notices $path as $type" ' + : ${pretty:=$path} + test_expect_success "fsck notices $pretty as $type" ' ( git init $name-$type && cd $name-$type && @@ -259,11 +260,12 @@ while read name path; do 100644 blob 040000 tree EOF -done <<-\EOF +done <<-EOF dot . dotdot .. dotgit .git dotgit-case .GIT +dotgit-unicode .gI${u200c}T .gI{u200c}T EOF test_done -- cgit v1.2.3 From 1d1d69bc52dcc7def5b2edbd165cc0a4e3911c8e Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 16 Dec 2014 23:31:03 +0100 Subject: path: add is_ntfs_dotgit() helper We do not allow paths with a ".git" component to be added to the index, as that would mean repository contents could overwrite our repository files. However, asking "is this path the same as .git" is not as simple as strcmp() on some filesystems. On NTFS (and FAT32), there exist so-called "short names" for backwards-compatibility: 8.3 compliant names that refer to the same files as their long names. As ".git" is not an 8.3 compliant name, a short name is generated automatically, typically "git~1". Depending on the Windows version, any combination of trailing spaces and periods are ignored, too, so that both "git~1." and ".git." still refer to the Git directory. The reason is that 8.3 stores file names shorter than 8 characters with trailing spaces. So literally, it does not matter for the short name whether it is padded with spaces or whether it is shorter than 8 characters, it is considered to be the exact same. The period is the separator between file name and file extension, and again, an empty extension consists just of spaces in 8.3 format. So technically, we would need only take care of the equivalent of this regex: (\.git {0,4}|git~1 {0,3})\. {0,3} However, there are indications that at least some Windows versions might be more lenient and accept arbitrary combinations of trailing spaces and periods and strip them out. So we're playing it real safe here. Besides, there can be little doubt about the intention behind using file names matching even the more lenient pattern specified above, therefore we should be fine with disallowing such patterns. Extra care is taken to catch names such as '.\\.git\\booh' because the backslash is marked as a directory separator only on Windows, and we want to use this new helper function also in fsck on other platforms. A big thank you goes to Ed Thomson and an unnamed Microsoft engineer for the detailed analysis performed to come up with the corresponding fixes for libgit2. This commit adds a function to detect whether a given file name can refer to the Git directory by mistake. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- cache.h | 1 + path.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/cache.h b/cache.h index b600a0c3e4..d17b1d6295 100644 --- a/cache.h +++ b/cache.h @@ -759,6 +759,7 @@ int longest_ancestor_length(const char *path, struct string_list *prefixes); char *strip_path_suffix(const char *path, const char *suffix); int daemon_avoid_alias(const char *path); int offset_1st_component(const char *path); +extern int is_ntfs_dotgit(const char *name); /* object replacement */ #define READ_SHA1_FILE_REPLACE 1 diff --git a/path.c b/path.c index 24594c4112..4ef1b01e05 100644 --- a/path.c +++ b/path.c @@ -830,3 +830,36 @@ int offset_1st_component(const char *path) return 2 + is_dir_sep(path[2]); return is_dir_sep(path[0]); } + +static int only_spaces_and_periods(const char *path, size_t len, size_t skip) +{ + if (len < skip) + return 0; + len -= skip; + path += skip; + while (len-- > 0) { + char c = *(path++); + if (c != ' ' && c != '.') + return 0; + } + return 1; +} + +int is_ntfs_dotgit(const char *name) +{ + int len; + + for (len = 0; ; len++) + if (!name[len] || name[len] == '\\' || is_dir_sep(name[len])) { + if (only_spaces_and_periods(name, len, 4) && + !strncasecmp(name, ".git", 4)) + return 1; + if (only_spaces_and_periods(name, len, 5) && + !strncasecmp(name, "git~1", 5)) + return 1; + if (name[len] != '\\') + return 0; + name += len + 1; + len = -1; + } +} -- cgit v1.2.3 From 2b4c6efc82119ba8f4169717473d95d1a89e4c69 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 16 Dec 2014 23:46:59 +0100 Subject: read-cache: optionally disallow NTFS .git variants The point of disallowing ".git" in the index is that we would never want to accidentally overwrite files in the repository directory. But this means we need to respect the filesystem's idea of when two paths are equal. The prior commit added a helper to make such a comparison for NTFS and FAT32; let's use it in verify_path(). We make this check optional for two reasons: 1. It restricts the set of allowable filenames, which is unnecessary for people who are not on NTFS nor FAT32. In practice this probably doesn't matter, though, as the restricted names are rather obscure and almost certainly would never come up in practice. 2. It has a minor performance penalty for every path we insert into the index. This patch ties the check to the core.protectNTFS config option. Though this is expected to be most useful on Windows, we allow it to be set everywhere, as NTFS may be mounted on other platforms. The variable does default to on for Windows, though. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/config.txt | 6 ++++++ cache.h | 1 + config.c | 5 +++++ config.mak.uname | 2 ++ environment.c | 5 +++++ read-cache.c | 2 ++ t/t1014-read-tree-confusing.sh | 13 +++++++++++++ 7 files changed, 34 insertions(+) diff --git a/Documentation/config.txt b/Documentation/config.txt index 0677bd8df5..097fdd47e1 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -239,6 +239,12 @@ core.protectHFS:: be considered equivalent to `.git` on an HFS+ filesystem. Defaults to `true` on Mac OS, and `false` elsewhere. +core.protectNTFS:: + If set to true, do not allow checkout of paths that would + cause problems with the NTFS filesystem, e.g. conflict with + 8.3 "short" names. + Defaults to `true` on Windows, and `false` elsewhere. + core.trustctime:: If false, the ctime differences between the index and the working tree are ignored; useful when the inode change time diff --git a/cache.h b/cache.h index d17b1d6295..29ed24b802 100644 --- a/cache.h +++ b/cache.h @@ -585,6 +585,7 @@ extern int core_preload_index; extern int core_apply_sparse_checkout; extern int precomposed_unicode; extern int protect_hfs; +extern int protect_ntfs; /* * The character that begins a commented line in user-editable file diff --git a/config.c b/config.c index b519cedc01..2cd64b6e3a 100644 --- a/config.c +++ b/config.c @@ -886,6 +886,11 @@ static int git_default_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.protectntfs")) { + protect_ntfs = git_config_bool(var, value); + return 0; + } + /* Add other config variables here and to Documentation/config.txt. */ return 0; } diff --git a/config.mak.uname b/config.mak.uname index 23af148837..ec7ed7ac3b 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -362,6 +362,7 @@ ifeq ($(uname_S),Windows) EXTLIBS = user32.lib advapi32.lib shell32.lib wininet.lib ws2_32.lib PTHREAD_LIBS = lib = + BASIC_CFLAGS += -DPROTECT_NTFS_DEFAULT=1 ifndef DEBUG BASIC_CFLAGS += -GL -Os -MT BASIC_LDFLAGS += -LTCG @@ -506,6 +507,7 @@ ifneq (,$(findstring MINGW,$(uname_S))) COMPAT_OBJS += compat/mingw.o compat/winansi.o \ compat/win32/pthread.o compat/win32/syslog.o \ compat/win32/dirent.o + BASIC_CFLAGS += -DPROTECT_NTFS_DEFAULT=1 BASIC_LDFLAGS += -Wl,--large-address-aware EXTLIBS += -lws2_32 GITLIBS += git.res diff --git a/environment.c b/environment.c index 828b574a29..184748da3e 100644 --- a/environment.c +++ b/environment.c @@ -68,6 +68,11 @@ unsigned long pack_size_limit_cfg; #endif int protect_hfs = PROTECT_HFS_DEFAULT; +#ifndef PROTECT_NTFS_DEFAULT +#define PROTECT_NTFS_DEFAULT 0 +#endif +int protect_ntfs = PROTECT_NTFS_DEFAULT; + /* * The character that begins a commented line in user-editable file * that is subject to stripspace. diff --git a/read-cache.c b/read-cache.c index 7f48a08c15..4fa208b662 100644 --- a/read-cache.c +++ b/read-cache.c @@ -789,6 +789,8 @@ int verify_path(const char *path) inside: if (protect_hfs && is_hfs_dotgit(path)) return 0; + if (protect_ntfs && is_ntfs_dotgit(path)) + return 0; c = *path++; if ((c == '.' && !verify_dotfile(path)) || is_dir_sep(c) || c == '\0') diff --git a/t/t1014-read-tree-confusing.sh b/t/t1014-read-tree-confusing.sh index ec310d5938..2f5a25d503 100755 --- a/t/t1014-read-tree-confusing.sh +++ b/t/t1014-read-tree-confusing.sh @@ -15,8 +15,17 @@ test_expect_success 'enable core.protectHFS for rejection tests' ' git config core.protectHFS true ' +test_expect_success 'enable core.protectNTFS for rejection tests' ' + git config core.protectNTFS true +' + while read path pretty; do : ${pretty:=$path} + case "$path" in + *SPACE) + path="${path%SPACE} " + ;; + esac test_expect_success "reject $pretty at end of path" ' printf "100644 blob %s\t%s" "$blob" "$path" >tree && bogus=$(git mktree Date: Wed, 10 Dec 2014 22:28:27 +0100 Subject: fsck: complain about NTFS ".git" aliases in trees Now that the index can block pathnames that can be mistaken to mean ".git" on NTFS and FAT32, it would be helpful for fsck to notice such problematic paths. This lets servers which use receive.fsckObjects block them before the damage spreads. Note that the fsck check is always on, even for systems without core.protectNTFS set. This is technically more restrictive than we need to be, as a set of users on ext4 could happily use these odd filenames without caring about NTFS. However, on balance, it's helpful for all servers to block these (because the paths can be used for mischief, and servers which bother to fsck would want to stop the spread whether they are on NTFS themselves or not), and hardly anybody will be affected (because the blocked names are variants of .git or git~1, meaning mischief is almost certainly what the tree author had in mind). Ideally these would be controlled by a separate "fsck.protectNTFS" flag. However, it would be much nicer to be able to enable/disable _any_ fsck flag individually, and any scheme we choose should match such a system. Given the likelihood of anybody using such a path in practice, it is not unreasonable to wait until such a system materializes. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- fsck.c | 3 ++- t/t1450-fsck.sh | 9 +++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fsck.c b/fsck.c index b49113bf0e..0b76de6f68 100644 --- a/fsck.c +++ b/fsck.c @@ -176,7 +176,8 @@ static int fsck_tree(struct tree *item, int strict, fsck_error error_func) has_dot = 1; if (!strcmp(name, "..")) has_dotdot = 1; - if (!strcasecmp(name, ".git") || is_hfs_dotgit(name)) + if (!strcasecmp(name, ".git") || is_hfs_dotgit(name) || + is_ntfs_dotgit(name)) has_dotgit = 1; has_zero_pad |= *(char *)desc.buffer == '0'; update_tree_entry(&desc); diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index 8158b98e6f..6edd99a81e 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -251,10 +251,10 @@ while read name path pretty; do tree=$(git rev-parse HEAD^{tree}) && value=$(eval "echo \$$type") && printf "$mode $type %s\t%s" "$value" "$path" >bad && - git mktree out && cat out && - grep "warning.*\\." out + grep "warning.*tree $bad_tree" out )' done <<-\EOF 100644 blob @@ -266,6 +266,11 @@ dotdot .. dotgit .git dotgit-case .GIT dotgit-unicode .gI${u200c}T .gI{u200c}T +dotgit-case2 .Git +git-tilde1 git~1 +dotgitdot .git. +dot-backslash-case .\\\\.GIT\\\\foobar +dotgit-case-backslash .git\\\\foobar EOF test_done -- cgit v1.2.3