summaryrefslogtreecommitdiff
path: root/dir.c
diff options
context:
space:
mode:
Diffstat (limited to 'dir.c')
-rw-r--r--dir.c442
1 files changed, 362 insertions, 80 deletions
diff --git a/dir.c b/dir.c
index 12d454b89d..b460211e61 100644
--- a/dir.c
+++ b/dir.c
@@ -41,7 +41,8 @@ struct cached_dir {
int nr_files;
int nr_dirs;
- struct dirent *de;
+ const char *d_name;
+ int d_type;
const char *file;
struct untracked_cache_dir *ucd;
};
@@ -50,8 +51,8 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
struct index_state *istate, const char *path, int len,
struct untracked_cache_dir *untracked,
int check_only, int stop_at_first_file, const struct pathspec *pathspec);
-static int get_dtype(struct dirent *de, struct index_state *istate,
- const char *path, int len);
+static int resolve_dtype(int dtype, struct index_state *istate,
+ const char *path, int len);
int count_slashes(const char *s)
{
@@ -371,13 +372,20 @@ static int match_pathspec_item(const struct index_state *istate,
!ps_strncmp(item, match, name, namelen))
return MATCHED_RECURSIVELY_LEADING_PATHSPEC;
- /* name" doesn't match up to the first wild character */
+ /* name doesn't match up to the first wild character */
if (item->nowildcard_len < item->len &&
ps_strncmp(item, match, name,
item->nowildcard_len - prefix))
return 0;
/*
+ * name has no wildcard, and it didn't match as a leading
+ * pathspec so return.
+ */
+ if (item->nowildcard_len == item->len)
+ return 0;
+
+ /*
* Here is where we would perform a wildmatch to check if
* "name" can be matched as a directory (or a prefix) against
* the pathspec. Since wildmatch doesn't have this capability
@@ -609,6 +617,159 @@ void parse_path_pattern(const char **pattern,
*patternlen = len;
}
+int pl_hashmap_cmp(const void *unused_cmp_data,
+ const struct hashmap_entry *a,
+ const struct hashmap_entry *b,
+ const void *key)
+{
+ const struct pattern_entry *ee1 =
+ container_of(a, struct pattern_entry, ent);
+ const struct pattern_entry *ee2 =
+ container_of(b, struct pattern_entry, ent);
+
+ size_t min_len = ee1->patternlen <= ee2->patternlen
+ ? ee1->patternlen
+ : ee2->patternlen;
+
+ if (ignore_case)
+ return strncasecmp(ee1->pattern, ee2->pattern, min_len);
+ return strncmp(ee1->pattern, ee2->pattern, min_len);
+}
+
+static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern *given)
+{
+ struct pattern_entry *translated;
+ char *truncated;
+ char *data = NULL;
+
+ if (!pl->use_cone_patterns)
+ return;
+
+ if (given->flags & PATTERN_FLAG_NEGATIVE &&
+ given->flags & PATTERN_FLAG_MUSTBEDIR &&
+ !strcmp(given->pattern, "/*")) {
+ pl->full_cone = 0;
+ return;
+ }
+
+ if (!given->flags && !strcmp(given->pattern, "/*")) {
+ pl->full_cone = 1;
+ return;
+ }
+
+ if (given->patternlen > 2 &&
+ !strcmp(given->pattern + given->patternlen - 2, "/*")) {
+ if (!(given->flags & PATTERN_FLAG_NEGATIVE)) {
+ /* Not a cone pattern. */
+ pl->use_cone_patterns = 0;
+ warning(_("unrecognized pattern: '%s'"), given->pattern);
+ goto clear_hashmaps;
+ }
+
+ truncated = xstrdup(given->pattern);
+ truncated[given->patternlen - 2] = 0;
+
+ translated = xmalloc(sizeof(struct pattern_entry));
+ translated->pattern = truncated;
+ translated->patternlen = given->patternlen - 2;
+ hashmap_entry_init(&translated->ent,
+ ignore_case ?
+ strihash(translated->pattern) :
+ strhash(translated->pattern));
+
+ if (!hashmap_get_entry(&pl->recursive_hashmap,
+ translated, ent, NULL)) {
+ /* We did not see the "parent" included */
+ warning(_("unrecognized negative pattern: '%s'"),
+ given->pattern);
+ free(truncated);
+ free(translated);
+ goto clear_hashmaps;
+ }
+
+ hashmap_add(&pl->parent_hashmap, &translated->ent);
+ hashmap_remove(&pl->recursive_hashmap, &translated->ent, &data);
+ free(data);
+ return;
+ }
+
+ if (given->flags & PATTERN_FLAG_NEGATIVE) {
+ warning(_("unrecognized negative pattern: '%s'"),
+ given->pattern);
+ goto clear_hashmaps;
+ }
+
+ translated = xmalloc(sizeof(struct pattern_entry));
+
+ translated->pattern = xstrdup(given->pattern);
+ translated->patternlen = given->patternlen;
+ hashmap_entry_init(&translated->ent,
+ ignore_case ?
+ strihash(translated->pattern) :
+ strhash(translated->pattern));
+
+ hashmap_add(&pl->recursive_hashmap, &translated->ent);
+
+ if (hashmap_get_entry(&pl->parent_hashmap, translated, ent, NULL)) {
+ /* we already included this at the parent level */
+ warning(_("your sparse-checkout file may have issues: pattern '%s' is repeated"),
+ given->pattern);
+ hashmap_remove(&pl->parent_hashmap, &translated->ent, &data);
+ free(data);
+ free(translated);
+ }
+
+ return;
+
+clear_hashmaps:
+ warning(_("disabling cone pattern matching"));
+ hashmap_free_entries(&pl->parent_hashmap, struct pattern_entry, ent);
+ hashmap_free_entries(&pl->recursive_hashmap, struct pattern_entry, ent);
+ pl->use_cone_patterns = 0;
+}
+
+static int hashmap_contains_path(struct hashmap *map,
+ struct strbuf *pattern)
+{
+ struct pattern_entry p;
+
+ /* Check straight mapping */
+ p.pattern = pattern->buf;
+ p.patternlen = pattern->len;
+ hashmap_entry_init(&p.ent,
+ ignore_case ?
+ strihash(p.pattern) :
+ strhash(p.pattern));
+ return !!hashmap_get_entry(map, &p, ent, NULL);
+}
+
+int hashmap_contains_parent(struct hashmap *map,
+ const char *path,
+ struct strbuf *buffer)
+{
+ char *slash_pos;
+
+ strbuf_setlen(buffer, 0);
+
+ if (path[0] != '/')
+ strbuf_addch(buffer, '/');
+
+ strbuf_addstr(buffer, path);
+
+ slash_pos = strrchr(buffer->buf, '/');
+
+ while (slash_pos > buffer->buf) {
+ strbuf_setlen(buffer, slash_pos - buffer->buf);
+
+ if (hashmap_contains_path(map, buffer))
+ return 1;
+
+ slash_pos = strrchr(buffer->buf, '/');
+ }
+
+ return 0;
+}
+
void add_pattern(const char *string, const char *base,
int baselen, struct pattern_list *pl, int srcpos)
{
@@ -633,6 +794,8 @@ void add_pattern(const char *string, const char *base,
ALLOC_GROW(pl->patterns, pl->nr + 1, pl->alloc);
pl->patterns[pl->nr++] = pattern;
pattern->pl = pl;
+
+ add_pattern_to_hashsets(pl, pattern);
}
static int read_skip_worktree_file_from_index(const struct index_state *istate,
@@ -858,6 +1021,9 @@ static int add_patterns_from_buffer(char *buf, size_t size,
int i, lineno = 1;
char *entry;
+ hashmap_init(&pl->recursive_hashmap, pl_hashmap_cmp, NULL, 0);
+ hashmap_init(&pl->parent_hashmap, pl_hashmap_cmp, NULL, 0);
+
pl->filebuf = buf;
if (skip_utf8_bom(&buf, size))
@@ -1050,8 +1216,7 @@ static struct path_pattern *last_matching_pattern_from_list(const char *pathname
int prefix = pattern->nowildcardlen;
if (pattern->flags & PATTERN_FLAG_MUSTBEDIR) {
- if (*dtype == DT_UNKNOWN)
- *dtype = get_dtype(NULL, istate, pathname, pathlen);
+ *dtype = resolve_dtype(*dtype, istate, pathname, pathlen);
if (*dtype != DT_DIR)
continue;
}
@@ -1094,16 +1259,58 @@ enum pattern_match_result path_matches_pattern_list(
struct index_state *istate)
{
struct path_pattern *pattern;
- pattern = last_matching_pattern_from_list(pathname, pathlen, basename,
- dtype, pl, istate);
- if (pattern) {
- if (pattern->flags & PATTERN_FLAG_NEGATIVE)
- return NOT_MATCHED;
- else
- return MATCHED;
+ struct strbuf parent_pathname = STRBUF_INIT;
+ int result = NOT_MATCHED;
+ const char *slash_pos;
+
+ if (!pl->use_cone_patterns) {
+ pattern = last_matching_pattern_from_list(pathname, pathlen, basename,
+ dtype, pl, istate);
+ if (pattern) {
+ if (pattern->flags & PATTERN_FLAG_NEGATIVE)
+ return NOT_MATCHED;
+ else
+ return MATCHED;
+ }
+
+ return UNDECIDED;
+ }
+
+ if (pl->full_cone)
+ return MATCHED;
+
+ strbuf_addch(&parent_pathname, '/');
+ strbuf_add(&parent_pathname, pathname, pathlen);
+
+ if (hashmap_contains_path(&pl->recursive_hashmap,
+ &parent_pathname)) {
+ result = MATCHED_RECURSIVE;
+ goto done;
}
- return UNDECIDED;
+ slash_pos = strrchr(parent_pathname.buf, '/');
+
+ if (slash_pos == parent_pathname.buf) {
+ /* include every file in root */
+ result = MATCHED;
+ goto done;
+ }
+
+ strbuf_setlen(&parent_pathname, slash_pos - parent_pathname.buf);
+
+ if (hashmap_contains_path(&pl->parent_hashmap, &parent_pathname)) {
+ result = MATCHED;
+ goto done;
+ }
+
+ if (hashmap_contains_parent(&pl->recursive_hashmap,
+ pathname,
+ &parent_pathname))
+ result = MATCHED_RECURSIVE;
+
+done:
+ strbuf_release(&parent_pathname);
+ return result;
}
static struct path_pattern *last_matching_pattern_from_lists(
@@ -1452,6 +1659,8 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
const char *dirname, int len, int baselen, int exclude,
const struct pathspec *pathspec)
{
+ int nested_repo = 0;
+
/* The "len-1" is to strip the final '/' */
switch (directory_exists_in_index(istate, dirname, len-1)) {
case index_directory:
@@ -1461,15 +1670,16 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
return path_none;
case index_nonexistent:
- if (dir->flags & DIR_SKIP_NESTED_GIT) {
- int nested_repo;
+ if ((dir->flags & DIR_SKIP_NESTED_GIT) ||
+ !(dir->flags & DIR_NO_GITLINKS)) {
struct strbuf sb = STRBUF_INIT;
strbuf_addstr(&sb, dirname);
nested_repo = is_nonbare_repository_dir(&sb);
strbuf_release(&sb);
- if (nested_repo)
- return path_none;
}
+ if (nested_repo)
+ return ((dir->flags & DIR_SKIP_NESTED_GIT) ? path_none :
+ (exclude ? path_excluded : path_untracked));
if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
break;
@@ -1497,13 +1707,6 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
return path_none;
}
- if (!(dir->flags & DIR_NO_GITLINKS)) {
- struct strbuf sb = STRBUF_INIT;
- strbuf_addstr(&sb, dirname);
- if (is_nonbare_repository_dir(&sb))
- return exclude ? path_excluded : path_untracked;
- strbuf_release(&sb);
- }
return path_recurse;
}
@@ -1639,10 +1842,9 @@ static int get_index_dtype(struct index_state *istate,
return DT_UNKNOWN;
}
-static int get_dtype(struct dirent *de, struct index_state *istate,
- const char *path, int len)
+static int resolve_dtype(int dtype, struct index_state *istate,
+ const char *path, int len)
{
- int dtype = de ? DTYPE(de) : DT_UNKNOWN;
struct stat st;
if (dtype != DT_UNKNOWN)
@@ -1667,14 +1869,13 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
struct strbuf *path,
int baselen,
const struct pathspec *pathspec,
- int dtype, struct dirent *de)
+ int dtype)
{
int exclude;
int has_path_in_index = !!index_file_exists(istate, path->buf, path->len, ignore_case);
enum path_treatment path_treatment;
- if (dtype == DT_UNKNOWN)
- dtype = get_dtype(de, istate, path->buf, path->len);
+ dtype = resolve_dtype(dtype, istate, path->buf, path->len);
/* Always exclude indexed files */
if (dtype != DT_DIR && has_path_in_index)
@@ -1782,21 +1983,18 @@ static enum path_treatment treat_path(struct dir_struct *dir,
int baselen,
const struct pathspec *pathspec)
{
- int dtype;
- struct dirent *de = cdir->de;
-
- if (!de)
+ if (!cdir->d_name)
return treat_path_fast(dir, untracked, cdir, istate, path,
baselen, pathspec);
- if (is_dot_or_dotdot(de->d_name) || !fspathcmp(de->d_name, ".git"))
+ if (is_dot_or_dotdot(cdir->d_name) || !fspathcmp(cdir->d_name, ".git"))
return path_none;
strbuf_setlen(path, baselen);
- strbuf_addstr(path, de->d_name);
+ strbuf_addstr(path, cdir->d_name);
if (simplify_away(path->buf, path->len, pathspec))
return path_none;
- dtype = DTYPE(de);
- return treat_one_path(dir, untracked, istate, path, baselen, pathspec, dtype, de);
+ return treat_one_path(dir, untracked, istate, path, baselen, pathspec,
+ cdir->d_type);
}
static void add_untracked(struct untracked_cache_dir *dir, const char *name)
@@ -1884,10 +2082,17 @@ static int open_cached_dir(struct cached_dir *cdir,
static int read_cached_dir(struct cached_dir *cdir)
{
+ struct dirent *de;
+
if (cdir->fdir) {
- cdir->de = readdir(cdir->fdir);
- if (!cdir->de)
+ de = readdir(cdir->fdir);
+ if (!de) {
+ cdir->d_name = NULL;
+ cdir->d_type = DT_UNKNOWN;
return -1;
+ }
+ cdir->d_name = de->d_name;
+ cdir->d_type = DTYPE(de);
return 0;
}
while (cdir->nr_dirs < cdir->untracked->dirs_nr) {
@@ -1923,6 +2128,40 @@ static void close_cached_dir(struct cached_dir *cdir)
}
}
+static void add_path_to_appropriate_result_list(struct dir_struct *dir,
+ struct untracked_cache_dir *untracked,
+ struct cached_dir *cdir,
+ struct index_state *istate,
+ struct strbuf *path,
+ int baselen,
+ const struct pathspec *pathspec,
+ enum path_treatment state)
+{
+ /* add the path to the appropriate result list */
+ switch (state) {
+ case path_excluded:
+ if (dir->flags & DIR_SHOW_IGNORED)
+ dir_add_name(dir, istate, path->buf, path->len);
+ else if ((dir->flags & DIR_SHOW_IGNORED_TOO) ||
+ ((dir->flags & DIR_COLLECT_IGNORED) &&
+ exclude_matches_pathspec(path->buf, path->len,
+ pathspec)))
+ dir_add_ignored(dir, istate, path->buf, path->len);
+ break;
+
+ case path_untracked:
+ if (dir->flags & DIR_SHOW_IGNORED)
+ break;
+ dir_add_name(dir, istate, path->buf, path->len);
+ if (cdir->fdir)
+ add_untracked(untracked, path->buf + baselen);
+ break;
+
+ default:
+ break;
+ }
+}
+
/*
* Read a directory tree. We currently ignore anything but
* directories, regular files and symlinks. That's because git
@@ -1947,6 +2186,15 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
struct untracked_cache_dir *untracked, int check_only,
int stop_at_first_file, const struct pathspec *pathspec)
{
+ /*
+ * WARNING WARNING WARNING:
+ *
+ * Any updates to the traversal logic here may need corresponding
+ * updates in treat_leading_path(). See the commit message for the
+ * commit adding this warning as well as the commit preceding it
+ * for details.
+ */
+
struct cached_dir cdir;
enum path_treatment state, subdir_state, dir_state = path_none;
struct strbuf path = STRBUF_INIT;
@@ -1970,7 +2218,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
/* recurse into subdir if instructed by treat_path */
if ((state == path_recurse) ||
((state == path_untracked) &&
- (get_dtype(cdir.de, istate, path.buf, path.len) == DT_DIR) &&
+ (resolve_dtype(cdir.d_type, istate, path.buf, path.len) == DT_DIR) &&
((dir->flags & DIR_SHOW_IGNORED_TOO) ||
(pathspec &&
do_match_pathspec(istate, pathspec, path.buf, path.len,
@@ -2026,29 +2274,9 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
continue;
}
- /* add the path to the appropriate result list */
- switch (state) {
- case path_excluded:
- if (dir->flags & DIR_SHOW_IGNORED)
- dir_add_name(dir, istate, path.buf, path.len);
- else if ((dir->flags & DIR_SHOW_IGNORED_TOO) ||
- ((dir->flags & DIR_COLLECT_IGNORED) &&
- exclude_matches_pathspec(path.buf, path.len,
- pathspec)))
- dir_add_ignored(dir, istate, path.buf, path.len);
- break;
-
- case path_untracked:
- if (dir->flags & DIR_SHOW_IGNORED)
- break;
- dir_add_name(dir, istate, path.buf, path.len);
- if (cdir.fdir)
- add_untracked(untracked, path.buf + baselen);
- break;
-
- default:
- break;
- }
+ add_path_to_appropriate_result_list(dir, untracked, &cdir,
+ istate, &path, baselen,
+ pathspec, state);
}
close_cached_dir(&cdir);
out:
@@ -2078,41 +2306,95 @@ static int treat_leading_path(struct dir_struct *dir,
const char *path, int len,
const struct pathspec *pathspec)
{
+ /*
+ * WARNING WARNING WARNING:
+ *
+ * Any updates to the traversal logic here may need corresponding
+ * updates in read_directory_recursive(). See 777b420347 (dir:
+ * synchronize treat_leading_path() and read_directory_recursive(),
+ * 2019-12-19) and its parent commit for details.
+ */
+
struct strbuf sb = STRBUF_INIT;
- int baselen, rc = 0;
+ struct strbuf subdir = STRBUF_INIT;
+ int prevlen, baselen;
const char *cp;
- int old_flags = dir->flags;
+ struct cached_dir cdir;
+ enum path_treatment state = path_none;
+
+ /*
+ * For each directory component of path, we are going to check whether
+ * that path is relevant given the pathspec. For example, if path is
+ * foo/bar/baz/
+ * then we will ask treat_path() whether we should go into foo, then
+ * whether we should go into bar, then whether baz is relevant.
+ * Checking each is important because e.g. if path is
+ * .git/info/
+ * then we need to check .git to know we shouldn't traverse it.
+ * If the return from treat_path() is:
+ * * path_none, for any path, we return false.
+ * * path_recurse, for all path components, we return true
+ * * <anything else> for some intermediate component, we make sure
+ * to add that path to the relevant list but return false
+ * signifying that we shouldn't recurse into it.
+ */
while (len && path[len - 1] == '/')
len--;
if (!len)
return 1;
+
+ memset(&cdir, 0, sizeof(cdir));
+ cdir.d_type = DT_DIR;
baselen = 0;
- dir->flags &= ~DIR_SHOW_OTHER_DIRECTORIES;
+ prevlen = 0;
while (1) {
- cp = path + baselen + !!baselen;
+ prevlen = baselen + !!baselen;
+ cp = path + prevlen;
cp = memchr(cp, '/', path + len - cp);
if (!cp)
baselen = len;
else
baselen = cp - path;
- strbuf_setlen(&sb, 0);
+ strbuf_reset(&sb);
strbuf_add(&sb, path, baselen);
if (!is_directory(sb.buf))
break;
- if (simplify_away(sb.buf, sb.len, pathspec))
- break;
- if (treat_one_path(dir, NULL, istate, &sb, baselen, pathspec,
- DT_DIR, NULL) == path_none)
+ strbuf_reset(&sb);
+ strbuf_add(&sb, path, prevlen);
+ strbuf_reset(&subdir);
+ strbuf_add(&subdir, path+prevlen, baselen-prevlen);
+ cdir.d_name = subdir.buf;
+ state = treat_path(dir, NULL, &cdir, istate, &sb, prevlen,
+ pathspec);
+ if (state == path_untracked &&
+ resolve_dtype(cdir.d_type, istate, sb.buf, sb.len) == DT_DIR &&
+ (dir->flags & DIR_SHOW_IGNORED_TOO ||
+ do_match_pathspec(istate, pathspec, sb.buf, sb.len,
+ baselen, NULL, DO_MATCH_LEADING_PATHSPEC) == MATCHED_RECURSIVELY_LEADING_PATHSPEC)) {
+ if (!match_pathspec(istate, pathspec, sb.buf, sb.len,
+ 0 /* prefix */, NULL,
+ 0 /* do NOT special case dirs */))
+ state = path_none;
+ add_path_to_appropriate_result_list(dir, NULL, &cdir,
+ istate,
+ &sb, baselen,
+ pathspec, state);
+ state = path_recurse;
+ }
+
+ if (state != path_recurse)
break; /* do not recurse into it */
- if (len <= baselen) {
- rc = 1;
+ if (len <= baselen)
break; /* finished checking */
- }
}
+ add_path_to_appropriate_result_list(dir, NULL, &cdir, istate,
+ &sb, baselen, pathspec,
+ state);
+
+ strbuf_release(&subdir);
strbuf_release(&sb);
- dir->flags = old_flags;
- return rc;
+ return state == path_recurse;
}
static const char *get_ident_string(void)
@@ -2487,7 +2769,7 @@ static int remove_dir_recurse(struct strbuf *path, int flag, int *kept_up)
* wanted anyway
*/
continue;
- /* fall thru */
+ /* fall through */
} else if (S_ISDIR(st.st_mode)) {
if (!remove_dir_recurse(path, flag, &kept_down))
continue; /* happy */