summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar Junio C Hamano <gitster@pobox.com>2015-08-03 11:01:13 -0700
committerLibravatar Junio C Hamano <gitster@pobox.com>2015-08-03 11:01:13 -0700
commit54d673f25d571ac2d8f50b1ccea4a7d69ca8a056 (patch)
tree2357d79569f29fa36a998dcf723756e015a12f8d
parentMerge branch 'cb/parse-magnitude' (diff)
parentread_gitfile_gently: fix use-after-free (diff)
downloadtgif-54d673f25d571ac2d8f50b1ccea4a7d69ca8a056.tar.xz
Merge branch 'ee/clean-remove-dirs'
Replace "is this subdirectory a separate repository that should not be touched?" check "git clean" does by checking if it has .git/HEAD using the submodule-related code with a more optimized check. * ee/clean-remove-dirs: read_gitfile_gently: fix use-after-free clean: improve performance when removing lots of directories p7300: add performance tests for clean t7300: add tests to document behavior of clean and nested git setup: sanity check file size in read_gitfile_gently setup: add gentle version of read_gitfile
-rw-r--r--builtin/clean.c30
-rw-r--r--cache.h12
-rw-r--r--setup.c89
-rwxr-xr-xt/perf/p7300-clean.sh31
-rwxr-xr-xt/t7300-clean.sh140
5 files changed, 277 insertions, 25 deletions
diff --git a/builtin/clean.c b/builtin/clean.c
index 6dcb72e644..df53def63f 100644
--- a/builtin/clean.c
+++ b/builtin/clean.c
@@ -10,7 +10,6 @@
#include "cache.h"
#include "dir.h"
#include "parse-options.h"
-#include "refs.h"
#include "string-list.h"
#include "quote.h"
#include "column.h"
@@ -148,6 +147,31 @@ static int exclude_cb(const struct option *opt, const char *arg, int unset)
return 0;
}
+/*
+ * Return 1 if the given path is the root of a git repository or
+ * submodule else 0. Will not return 1 for bare repositories with the
+ * exception of creating a bare repository in "foo/.git" and calling
+ * is_git_repository("foo").
+ */
+static int is_git_repository(struct strbuf *path)
+{
+ int ret = 0;
+ int gitfile_error;
+ size_t orig_path_len = path->len;
+ assert(orig_path_len != 0);
+ if (path->buf[orig_path_len - 1] != '/')
+ strbuf_addch(path, '/');
+ strbuf_addstr(path, ".git");
+ if (read_gitfile_gently(path->buf, &gitfile_error) || is_git_directory(path->buf))
+ ret = 1;
+ if (gitfile_error == READ_GITFILE_ERR_OPEN_FAILED ||
+ gitfile_error == READ_GITFILE_ERR_READ_FAILED)
+ ret = 1; /* This could be a real .git file, take the
+ * safe option and avoid cleaning */
+ strbuf_setlen(path, orig_path_len);
+ return ret;
+}
+
static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag,
int dry_run, int quiet, int *dir_gone)
{
@@ -155,13 +179,11 @@ static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag,
struct strbuf quoted = STRBUF_INIT;
struct dirent *e;
int res = 0, ret = 0, gone = 1, original_len = path->len, len;
- unsigned char submodule_head[20];
struct string_list dels = STRING_LIST_INIT_DUP;
*dir_gone = 1;
- if ((force_flag & REMOVE_DIR_KEEP_NESTED_GIT) &&
- !resolve_gitlink_ref(path->buf, "HEAD", submodule_head)) {
+ if ((force_flag & REMOVE_DIR_KEEP_NESTED_GIT) && is_git_repository(path)) {
if (!quiet) {
quote_path_relative(path->buf, prefix, &quoted);
printf(dry_run ? _(msg_would_skip_git_dir) : _(msg_skip_git_dir),
diff --git a/cache.h b/cache.h
index 8b621d7352..dcc9d3f40b 100644
--- a/cache.h
+++ b/cache.h
@@ -447,7 +447,17 @@ extern int get_common_dir(struct strbuf *sb, const char *gitdir);
extern const char *get_git_namespace(void);
extern const char *strip_namespace(const char *namespaced_ref);
extern const char *get_git_work_tree(void);
-extern const char *read_gitfile(const char *path);
+
+#define READ_GITFILE_ERR_STAT_FAILED 1
+#define READ_GITFILE_ERR_NOT_A_FILE 2
+#define READ_GITFILE_ERR_OPEN_FAILED 3
+#define READ_GITFILE_ERR_READ_FAILED 4
+#define READ_GITFILE_ERR_INVALID_FORMAT 5
+#define READ_GITFILE_ERR_NO_PATH 6
+#define READ_GITFILE_ERR_NOT_A_REPO 7
+#define READ_GITFILE_ERR_TOO_LARGE 8
+extern const char *read_gitfile_gently(const char *path, int *return_error_code);
+#define read_gitfile(path) read_gitfile_gently((path), NULL)
extern const char *resolve_gitdir(const char *suspect);
extern void set_git_work_tree(const char *tree);
diff --git a/setup.c b/setup.c
index 82c0cc2a13..5f9f07dcdb 100644
--- a/setup.c
+++ b/setup.c
@@ -411,35 +411,58 @@ static void update_linked_gitdir(const char *gitfile, const char *gitdir)
/*
* Try to read the location of the git directory from the .git file,
* return path to git directory if found.
+ *
+ * On failure, if return_error_code is not NULL, return_error_code
+ * will be set to an error code and NULL will be returned. If
+ * return_error_code is NULL the function will die instead (for most
+ * cases).
*/
-const char *read_gitfile(const char *path)
+const char *read_gitfile_gently(const char *path, int *return_error_code)
{
- char *buf;
- char *dir;
+ const int max_file_size = 1 << 20; /* 1MB */
+ int error_code = 0;
+ char *buf = NULL;
+ char *dir = NULL;
const char *slash;
struct stat st;
int fd;
ssize_t len;
- if (stat(path, &st))
- return NULL;
- if (!S_ISREG(st.st_mode))
- return NULL;
+ if (stat(path, &st)) {
+ error_code = READ_GITFILE_ERR_STAT_FAILED;
+ goto cleanup_return;
+ }
+ if (!S_ISREG(st.st_mode)) {
+ error_code = READ_GITFILE_ERR_NOT_A_FILE;
+ goto cleanup_return;
+ }
+ if (st.st_size > max_file_size) {
+ error_code = READ_GITFILE_ERR_TOO_LARGE;
+ goto cleanup_return;
+ }
fd = open(path, O_RDONLY);
- if (fd < 0)
- die_errno("Error opening '%s'", path);
+ if (fd < 0) {
+ error_code = READ_GITFILE_ERR_OPEN_FAILED;
+ goto cleanup_return;
+ }
buf = xmalloc(st.st_size + 1);
len = read_in_full(fd, buf, st.st_size);
close(fd);
- if (len != st.st_size)
- die("Error reading %s", path);
+ if (len != st.st_size) {
+ error_code = READ_GITFILE_ERR_READ_FAILED;
+ goto cleanup_return;
+ }
buf[len] = '\0';
- if (!starts_with(buf, "gitdir: "))
- die("Invalid gitfile format: %s", path);
+ if (!starts_with(buf, "gitdir: ")) {
+ error_code = READ_GITFILE_ERR_INVALID_FORMAT;
+ goto cleanup_return;
+ }
while (buf[len - 1] == '\n' || buf[len - 1] == '\r')
len--;
- if (len < 9)
- die("No path in gitfile: %s", path);
+ if (len < 9) {
+ error_code = READ_GITFILE_ERR_NO_PATH;
+ goto cleanup_return;
+ }
buf[len] = '\0';
dir = buf + 8;
@@ -453,15 +476,41 @@ const char *read_gitfile(const char *path)
free(buf);
buf = dir;
}
-
- if (!is_git_directory(dir))
- die("Not a git repository: %s", dir);
-
+ if (!is_git_directory(dir)) {
+ error_code = READ_GITFILE_ERR_NOT_A_REPO;
+ goto cleanup_return;
+ }
update_linked_gitdir(path, dir);
path = real_path(dir);
+cleanup_return:
+ if (return_error_code)
+ *return_error_code = error_code;
+ else if (error_code) {
+ switch (error_code) {
+ case READ_GITFILE_ERR_STAT_FAILED:
+ case READ_GITFILE_ERR_NOT_A_FILE:
+ /* non-fatal; follow return path */
+ break;
+ case READ_GITFILE_ERR_OPEN_FAILED:
+ die_errno("Error opening '%s'", path);
+ case READ_GITFILE_ERR_TOO_LARGE:
+ die("Too large to be a .git file: '%s'", path);
+ case READ_GITFILE_ERR_READ_FAILED:
+ die("Error reading %s", path);
+ case READ_GITFILE_ERR_INVALID_FORMAT:
+ die("Invalid gitfile format: %s", path);
+ case READ_GITFILE_ERR_NO_PATH:
+ die("No path in gitfile: %s", path);
+ case READ_GITFILE_ERR_NOT_A_REPO:
+ die("Not a git repository: %s", dir);
+ default:
+ assert(0);
+ }
+ }
+
free(buf);
- return path;
+ return error_code ? NULL : path;
}
static const char *setup_explicit_git_dir(const char *gitdirenv,
diff --git a/t/perf/p7300-clean.sh b/t/perf/p7300-clean.sh
new file mode 100755
index 0000000000..ec94cdd657
--- /dev/null
+++ b/t/perf/p7300-clean.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+test_description="Test git-clean performance"
+
+. ./perf-lib.sh
+
+test_perf_default_repo
+test_checkout_worktree
+
+test_expect_success 'setup untracked directory with many sub dirs' '
+ rm -rf 500_sub_dirs 100000_sub_dirs clean_test_dir &&
+ mkdir 500_sub_dirs 100000_sub_dirs clean_test_dir &&
+ for i in $(test_seq 1 500)
+ do
+ mkdir 500_sub_dirs/dir$i || return $?
+ done &&
+ for i in $(test_seq 1 200)
+ do
+ cp -r 500_sub_dirs 100000_sub_dirs/dir$i || return $?
+ done
+'
+
+test_perf 'clean many untracked sub dirs, check for nested git' '
+ git clean -n -q -f -d 100000_sub_dirs/
+'
+
+test_perf 'clean many untracked sub dirs, ignore nested git' '
+ git clean -n -q -f -f -d 100000_sub_dirs/
+'
+
+test_done
diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh
index 99be5d95d0..32e96da7e3 100755
--- a/t/t7300-clean.sh
+++ b/t/t7300-clean.sh
@@ -455,6 +455,146 @@ test_expect_success 'nested git work tree' '
! test -d bar
'
+test_expect_success 'should clean things that almost look like git but are not' '
+ rm -fr almost_git almost_bare_git almost_submodule &&
+ mkdir -p almost_git/.git/objects &&
+ mkdir -p almost_git/.git/refs &&
+ cat >almost_git/.git/HEAD <<-\EOF &&
+ garbage
+ EOF
+ cp -r almost_git/.git/ almost_bare_git &&
+ mkdir almost_submodule/ &&
+ cat >almost_submodule/.git <<-\EOF &&
+ garbage
+ EOF
+ test_when_finished "rm -rf almost_*" &&
+ git clean -f -d &&
+ test_path_is_missing almost_git &&
+ test_path_is_missing almost_bare_git &&
+ test_path_is_missing almost_submodule
+'
+
+test_expect_success 'should not clean submodules' '
+ rm -fr repo to_clean sub1 sub2 &&
+ mkdir repo to_clean &&
+ (
+ cd repo &&
+ git init &&
+ test_commit msg hello.world
+ ) &&
+ git submodule add ./repo/.git sub1 &&
+ git commit -m "sub1" &&
+ git branch before_sub2 &&
+ git submodule add ./repo/.git sub2 &&
+ git commit -m "sub2" &&
+ git checkout before_sub2 &&
+ >to_clean/should_clean.this &&
+ git clean -f -d &&
+ test_path_is_file repo/.git/index &&
+ test_path_is_file repo/hello.world &&
+ test_path_is_file sub1/.git &&
+ test_path_is_file sub1/hello.world &&
+ test_path_is_file sub2/.git &&
+ test_path_is_file sub2/hello.world &&
+ test_path_is_missing to_clean
+'
+
+test_expect_success 'should avoid cleaning possible submodules' '
+ rm -fr to_clean possible_sub1 &&
+ mkdir to_clean possible_sub1 &&
+ test_when_finished "rm -rf possible_sub*" &&
+ echo "gitdir: foo" >possible_sub1/.git &&
+ >possible_sub1/hello.world &&
+ chmod 0 possible_sub1/.git &&
+ >to_clean/should_clean.this &&
+ git clean -f -d &&
+ test_path_is_file possible_sub1/.git &&
+ test_path_is_file possible_sub1/hello.world &&
+ test_path_is_missing to_clean
+'
+
+test_expect_success 'nested (empty) git should be kept' '
+ rm -fr empty_repo to_clean &&
+ git init empty_repo &&
+ mkdir to_clean &&
+ >to_clean/should_clean.this &&
+ git clean -f -d &&
+ test_path_is_file empty_repo/.git/HEAD &&
+ test_path_is_missing to_clean
+'
+
+test_expect_success 'nested bare repositories should be cleaned' '
+ rm -fr bare1 bare2 subdir &&
+ git init --bare bare1 &&
+ git clone --local --bare . bare2 &&
+ mkdir subdir &&
+ cp -r bare2 subdir/bare3 &&
+ git clean -f -d &&
+ test_path_is_missing bare1 &&
+ test_path_is_missing bare2 &&
+ test_path_is_missing subdir
+'
+
+test_expect_failure 'nested (empty) bare repositories should be cleaned even when in .git' '
+ rm -fr strange_bare &&
+ mkdir strange_bare &&
+ git init --bare strange_bare/.git &&
+ git clean -f -d &&
+ test_path_is_missing strange_bare
+'
+
+test_expect_failure 'nested (non-empty) bare repositories should be cleaned even when in .git' '
+ rm -fr strange_bare &&
+ mkdir strange_bare &&
+ git clone --local --bare . strange_bare/.git &&
+ git clean -f -d &&
+ test_path_is_missing strange_bare
+'
+
+test_expect_success 'giving path in nested git work tree will remove it' '
+ rm -fr repo &&
+ mkdir repo &&
+ (
+ cd repo &&
+ git init &&
+ mkdir -p bar/baz &&
+ test_commit msg bar/baz/hello.world
+ ) &&
+ git clean -f -d repo/bar/baz &&
+ test_path_is_file repo/.git/HEAD &&
+ test_path_is_dir repo/bar/ &&
+ test_path_is_missing repo/bar/baz
+'
+
+test_expect_success 'giving path to nested .git will not remove it' '
+ rm -fr repo &&
+ mkdir repo untracked &&
+ (
+ cd repo &&
+ git init &&
+ test_commit msg hello.world
+ ) &&
+ git clean -f -d repo/.git &&
+ test_path_is_file repo/.git/HEAD &&
+ test_path_is_dir repo/.git/refs &&
+ test_path_is_dir repo/.git/objects &&
+ test_path_is_dir untracked/
+'
+
+test_expect_success 'giving path to nested .git/ will remove contents' '
+ rm -fr repo untracked &&
+ mkdir repo untracked &&
+ (
+ cd repo &&
+ git init &&
+ test_commit msg hello.world
+ ) &&
+ git clean -f -d repo/.git/ &&
+ test_path_is_dir repo/.git &&
+ test_dir_is_empty repo/.git &&
+ test_path_is_dir untracked/
+'
+
test_expect_success 'force removal of nested git work tree' '
rm -fr foo bar baz &&
mkdir -p foo bar baz/boo &&