diff options
author | Junio C Hamano <gitster@pobox.com> | 2015-08-03 11:01:13 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2015-08-03 11:01:13 -0700 |
commit | 54d673f25d571ac2d8f50b1ccea4a7d69ca8a056 (patch) | |
tree | 2357d79569f29fa36a998dcf723756e015a12f8d | |
parent | Merge branch 'cb/parse-magnitude' (diff) | |
parent | read_gitfile_gently: fix use-after-free (diff) | |
download | tgif-54d673f25d571ac2d8f50b1ccea4a7d69ca8a056.tar.xz |
Merge branch 'ee/clean-remove-dirs'
Replace "is this subdirectory a separate repository that should not
be touched?" check "git clean" does by checking if it has .git/HEAD
using the submodule-related code with a more optimized check.
* ee/clean-remove-dirs:
read_gitfile_gently: fix use-after-free
clean: improve performance when removing lots of directories
p7300: add performance tests for clean
t7300: add tests to document behavior of clean and nested git
setup: sanity check file size in read_gitfile_gently
setup: add gentle version of read_gitfile
-rw-r--r-- | builtin/clean.c | 30 | ||||
-rw-r--r-- | cache.h | 12 | ||||
-rw-r--r-- | setup.c | 89 | ||||
-rwxr-xr-x | t/perf/p7300-clean.sh | 31 | ||||
-rwxr-xr-x | t/t7300-clean.sh | 140 |
5 files changed, 277 insertions, 25 deletions
diff --git a/builtin/clean.c b/builtin/clean.c index 6dcb72e644..df53def63f 100644 --- a/builtin/clean.c +++ b/builtin/clean.c @@ -10,7 +10,6 @@ #include "cache.h" #include "dir.h" #include "parse-options.h" -#include "refs.h" #include "string-list.h" #include "quote.h" #include "column.h" @@ -148,6 +147,31 @@ static int exclude_cb(const struct option *opt, const char *arg, int unset) return 0; } +/* + * Return 1 if the given path is the root of a git repository or + * submodule else 0. Will not return 1 for bare repositories with the + * exception of creating a bare repository in "foo/.git" and calling + * is_git_repository("foo"). + */ +static int is_git_repository(struct strbuf *path) +{ + int ret = 0; + int gitfile_error; + size_t orig_path_len = path->len; + assert(orig_path_len != 0); + if (path->buf[orig_path_len - 1] != '/') + strbuf_addch(path, '/'); + strbuf_addstr(path, ".git"); + if (read_gitfile_gently(path->buf, &gitfile_error) || is_git_directory(path->buf)) + ret = 1; + if (gitfile_error == READ_GITFILE_ERR_OPEN_FAILED || + gitfile_error == READ_GITFILE_ERR_READ_FAILED) + ret = 1; /* This could be a real .git file, take the + * safe option and avoid cleaning */ + strbuf_setlen(path, orig_path_len); + return ret; +} + static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag, int dry_run, int quiet, int *dir_gone) { @@ -155,13 +179,11 @@ static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag, struct strbuf quoted = STRBUF_INIT; struct dirent *e; int res = 0, ret = 0, gone = 1, original_len = path->len, len; - unsigned char submodule_head[20]; struct string_list dels = STRING_LIST_INIT_DUP; *dir_gone = 1; - if ((force_flag & REMOVE_DIR_KEEP_NESTED_GIT) && - !resolve_gitlink_ref(path->buf, "HEAD", submodule_head)) { + if ((force_flag & REMOVE_DIR_KEEP_NESTED_GIT) && is_git_repository(path)) { if (!quiet) { quote_path_relative(path->buf, prefix, "ed); printf(dry_run ? _(msg_would_skip_git_dir) : _(msg_skip_git_dir), @@ -447,7 +447,17 @@ extern int get_common_dir(struct strbuf *sb, const char *gitdir); extern const char *get_git_namespace(void); extern const char *strip_namespace(const char *namespaced_ref); extern const char *get_git_work_tree(void); -extern const char *read_gitfile(const char *path); + +#define READ_GITFILE_ERR_STAT_FAILED 1 +#define READ_GITFILE_ERR_NOT_A_FILE 2 +#define READ_GITFILE_ERR_OPEN_FAILED 3 +#define READ_GITFILE_ERR_READ_FAILED 4 +#define READ_GITFILE_ERR_INVALID_FORMAT 5 +#define READ_GITFILE_ERR_NO_PATH 6 +#define READ_GITFILE_ERR_NOT_A_REPO 7 +#define READ_GITFILE_ERR_TOO_LARGE 8 +extern const char *read_gitfile_gently(const char *path, int *return_error_code); +#define read_gitfile(path) read_gitfile_gently((path), NULL) extern const char *resolve_gitdir(const char *suspect); extern void set_git_work_tree(const char *tree); @@ -411,35 +411,58 @@ static void update_linked_gitdir(const char *gitfile, const char *gitdir) /* * Try to read the location of the git directory from the .git file, * return path to git directory if found. + * + * On failure, if return_error_code is not NULL, return_error_code + * will be set to an error code and NULL will be returned. If + * return_error_code is NULL the function will die instead (for most + * cases). */ -const char *read_gitfile(const char *path) +const char *read_gitfile_gently(const char *path, int *return_error_code) { - char *buf; - char *dir; + const int max_file_size = 1 << 20; /* 1MB */ + int error_code = 0; + char *buf = NULL; + char *dir = NULL; const char *slash; struct stat st; int fd; ssize_t len; - if (stat(path, &st)) - return NULL; - if (!S_ISREG(st.st_mode)) - return NULL; + if (stat(path, &st)) { + error_code = READ_GITFILE_ERR_STAT_FAILED; + goto cleanup_return; + } + if (!S_ISREG(st.st_mode)) { + error_code = READ_GITFILE_ERR_NOT_A_FILE; + goto cleanup_return; + } + if (st.st_size > max_file_size) { + error_code = READ_GITFILE_ERR_TOO_LARGE; + goto cleanup_return; + } fd = open(path, O_RDONLY); - if (fd < 0) - die_errno("Error opening '%s'", path); + if (fd < 0) { + error_code = READ_GITFILE_ERR_OPEN_FAILED; + goto cleanup_return; + } buf = xmalloc(st.st_size + 1); len = read_in_full(fd, buf, st.st_size); close(fd); - if (len != st.st_size) - die("Error reading %s", path); + if (len != st.st_size) { + error_code = READ_GITFILE_ERR_READ_FAILED; + goto cleanup_return; + } buf[len] = '\0'; - if (!starts_with(buf, "gitdir: ")) - die("Invalid gitfile format: %s", path); + if (!starts_with(buf, "gitdir: ")) { + error_code = READ_GITFILE_ERR_INVALID_FORMAT; + goto cleanup_return; + } while (buf[len - 1] == '\n' || buf[len - 1] == '\r') len--; - if (len < 9) - die("No path in gitfile: %s", path); + if (len < 9) { + error_code = READ_GITFILE_ERR_NO_PATH; + goto cleanup_return; + } buf[len] = '\0'; dir = buf + 8; @@ -453,15 +476,41 @@ const char *read_gitfile(const char *path) free(buf); buf = dir; } - - if (!is_git_directory(dir)) - die("Not a git repository: %s", dir); - + if (!is_git_directory(dir)) { + error_code = READ_GITFILE_ERR_NOT_A_REPO; + goto cleanup_return; + } update_linked_gitdir(path, dir); path = real_path(dir); +cleanup_return: + if (return_error_code) + *return_error_code = error_code; + else if (error_code) { + switch (error_code) { + case READ_GITFILE_ERR_STAT_FAILED: + case READ_GITFILE_ERR_NOT_A_FILE: + /* non-fatal; follow return path */ + break; + case READ_GITFILE_ERR_OPEN_FAILED: + die_errno("Error opening '%s'", path); + case READ_GITFILE_ERR_TOO_LARGE: + die("Too large to be a .git file: '%s'", path); + case READ_GITFILE_ERR_READ_FAILED: + die("Error reading %s", path); + case READ_GITFILE_ERR_INVALID_FORMAT: + die("Invalid gitfile format: %s", path); + case READ_GITFILE_ERR_NO_PATH: + die("No path in gitfile: %s", path); + case READ_GITFILE_ERR_NOT_A_REPO: + die("Not a git repository: %s", dir); + default: + assert(0); + } + } + free(buf); - return path; + return error_code ? NULL : path; } static const char *setup_explicit_git_dir(const char *gitdirenv, diff --git a/t/perf/p7300-clean.sh b/t/perf/p7300-clean.sh new file mode 100755 index 0000000000..ec94cdd657 --- /dev/null +++ b/t/perf/p7300-clean.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +test_description="Test git-clean performance" + +. ./perf-lib.sh + +test_perf_default_repo +test_checkout_worktree + +test_expect_success 'setup untracked directory with many sub dirs' ' + rm -rf 500_sub_dirs 100000_sub_dirs clean_test_dir && + mkdir 500_sub_dirs 100000_sub_dirs clean_test_dir && + for i in $(test_seq 1 500) + do + mkdir 500_sub_dirs/dir$i || return $? + done && + for i in $(test_seq 1 200) + do + cp -r 500_sub_dirs 100000_sub_dirs/dir$i || return $? + done +' + +test_perf 'clean many untracked sub dirs, check for nested git' ' + git clean -n -q -f -d 100000_sub_dirs/ +' + +test_perf 'clean many untracked sub dirs, ignore nested git' ' + git clean -n -q -f -f -d 100000_sub_dirs/ +' + +test_done diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh index 99be5d95d0..32e96da7e3 100755 --- a/t/t7300-clean.sh +++ b/t/t7300-clean.sh @@ -455,6 +455,146 @@ test_expect_success 'nested git work tree' ' ! test -d bar ' +test_expect_success 'should clean things that almost look like git but are not' ' + rm -fr almost_git almost_bare_git almost_submodule && + mkdir -p almost_git/.git/objects && + mkdir -p almost_git/.git/refs && + cat >almost_git/.git/HEAD <<-\EOF && + garbage + EOF + cp -r almost_git/.git/ almost_bare_git && + mkdir almost_submodule/ && + cat >almost_submodule/.git <<-\EOF && + garbage + EOF + test_when_finished "rm -rf almost_*" && + git clean -f -d && + test_path_is_missing almost_git && + test_path_is_missing almost_bare_git && + test_path_is_missing almost_submodule +' + +test_expect_success 'should not clean submodules' ' + rm -fr repo to_clean sub1 sub2 && + mkdir repo to_clean && + ( + cd repo && + git init && + test_commit msg hello.world + ) && + git submodule add ./repo/.git sub1 && + git commit -m "sub1" && + git branch before_sub2 && + git submodule add ./repo/.git sub2 && + git commit -m "sub2" && + git checkout before_sub2 && + >to_clean/should_clean.this && + git clean -f -d && + test_path_is_file repo/.git/index && + test_path_is_file repo/hello.world && + test_path_is_file sub1/.git && + test_path_is_file sub1/hello.world && + test_path_is_file sub2/.git && + test_path_is_file sub2/hello.world && + test_path_is_missing to_clean +' + +test_expect_success 'should avoid cleaning possible submodules' ' + rm -fr to_clean possible_sub1 && + mkdir to_clean possible_sub1 && + test_when_finished "rm -rf possible_sub*" && + echo "gitdir: foo" >possible_sub1/.git && + >possible_sub1/hello.world && + chmod 0 possible_sub1/.git && + >to_clean/should_clean.this && + git clean -f -d && + test_path_is_file possible_sub1/.git && + test_path_is_file possible_sub1/hello.world && + test_path_is_missing to_clean +' + +test_expect_success 'nested (empty) git should be kept' ' + rm -fr empty_repo to_clean && + git init empty_repo && + mkdir to_clean && + >to_clean/should_clean.this && + git clean -f -d && + test_path_is_file empty_repo/.git/HEAD && + test_path_is_missing to_clean +' + +test_expect_success 'nested bare repositories should be cleaned' ' + rm -fr bare1 bare2 subdir && + git init --bare bare1 && + git clone --local --bare . bare2 && + mkdir subdir && + cp -r bare2 subdir/bare3 && + git clean -f -d && + test_path_is_missing bare1 && + test_path_is_missing bare2 && + test_path_is_missing subdir +' + +test_expect_failure 'nested (empty) bare repositories should be cleaned even when in .git' ' + rm -fr strange_bare && + mkdir strange_bare && + git init --bare strange_bare/.git && + git clean -f -d && + test_path_is_missing strange_bare +' + +test_expect_failure 'nested (non-empty) bare repositories should be cleaned even when in .git' ' + rm -fr strange_bare && + mkdir strange_bare && + git clone --local --bare . strange_bare/.git && + git clean -f -d && + test_path_is_missing strange_bare +' + +test_expect_success 'giving path in nested git work tree will remove it' ' + rm -fr repo && + mkdir repo && + ( + cd repo && + git init && + mkdir -p bar/baz && + test_commit msg bar/baz/hello.world + ) && + git clean -f -d repo/bar/baz && + test_path_is_file repo/.git/HEAD && + test_path_is_dir repo/bar/ && + test_path_is_missing repo/bar/baz +' + +test_expect_success 'giving path to nested .git will not remove it' ' + rm -fr repo && + mkdir repo untracked && + ( + cd repo && + git init && + test_commit msg hello.world + ) && + git clean -f -d repo/.git && + test_path_is_file repo/.git/HEAD && + test_path_is_dir repo/.git/refs && + test_path_is_dir repo/.git/objects && + test_path_is_dir untracked/ +' + +test_expect_success 'giving path to nested .git/ will remove contents' ' + rm -fr repo untracked && + mkdir repo untracked && + ( + cd repo && + git init && + test_commit msg hello.world + ) && + git clean -f -d repo/.git/ && + test_path_is_dir repo/.git && + test_dir_is_empty repo/.git && + test_path_is_dir untracked/ +' + test_expect_success 'force removal of nested git work tree' ' rm -fr foo bar baz && mkdir -p foo bar baz/boo && |