diff options
Diffstat (limited to 'dir.c')
-rw-r--r-- | dir.c | 465 |
1 files changed, 280 insertions, 185 deletions
@@ -193,6 +193,10 @@ int fill_directory(struct dir_struct *dir, const char *prefix; size_t prefix_len; + unsigned exclusive_flags = DIR_SHOW_IGNORED | DIR_SHOW_IGNORED_TOO; + if ((dir->flags & exclusive_flags) == exclusive_flags) + BUG("DIR_SHOW_IGNORED and DIR_SHOW_IGNORED_TOO are exclusive"); + /* * Calculate common prefix for the pathspec, and * use that to optimize the directory walk @@ -364,7 +368,8 @@ static int match_pathspec_item(const struct index_state *istate, return MATCHED_FNMATCH; /* Perform checks to see if "name" is a leading string of the pathspec */ - if (flags & DO_MATCH_LEADING_PATHSPEC) { + if ( (flags & DO_MATCH_LEADING_PATHSPEC) && + !(flags & DO_MATCH_EXCLUDE)) { /* name is a literal prefix of the pathspec */ int offset = name[namelen-1] == '/' ? 1 : 0; if ((namelen < matchlen) && @@ -401,6 +406,10 @@ static int match_pathspec_item(const struct index_state *istate, } /* + * do_match_pathspec() is meant to ONLY be called by + * match_pathspec_with_flags(); calling it directly risks pathspecs + * like ':!unwanted_path' being ignored. + * * Given a name and a list of pathspecs, returns the nature of the * closest (i.e. most specific) match of the name to any of the * pathspecs. @@ -486,13 +495,12 @@ static int do_match_pathspec(const struct index_state *istate, return retval; } -int match_pathspec(const struct index_state *istate, - const struct pathspec *ps, - const char *name, int namelen, - int prefix, char *seen, int is_dir) +static int match_pathspec_with_flags(const struct index_state *istate, + const struct pathspec *ps, + const char *name, int namelen, + int prefix, char *seen, unsigned flags) { int positive, negative; - unsigned flags = is_dir ? DO_MATCH_DIRECTORY : 0; positive = do_match_pathspec(istate, ps, name, namelen, prefix, seen, flags); if (!(ps->magic & PATHSPEC_EXCLUDE) || !positive) @@ -503,6 +511,16 @@ int match_pathspec(const struct index_state *istate, return negative ? 0 : positive; } +int match_pathspec(const struct index_state *istate, + const struct pathspec *ps, + const char *name, int namelen, + int prefix, char *seen, int is_dir) +{ + unsigned flags = is_dir ? DO_MATCH_DIRECTORY : 0; + return match_pathspec_with_flags(istate, ps, name, namelen, + prefix, seen, flags); +} + /** * Check if a submodule is a superset of the pathspec */ @@ -511,11 +529,11 @@ int submodule_path_match(const struct index_state *istate, const char *submodule_name, char *seen) { - int matched = do_match_pathspec(istate, ps, submodule_name, - strlen(submodule_name), - 0, seen, - DO_MATCH_DIRECTORY | - DO_MATCH_LEADING_PATHSPEC); + int matched = match_pathspec_with_flags(istate, ps, submodule_name, + strlen(submodule_name), + 0, seen, + DO_MATCH_DIRECTORY | + DO_MATCH_LEADING_PATHSPEC); return matched; } @@ -1727,36 +1745,61 @@ static enum exist_status directory_exists_in_index(struct index_state *istate, static enum path_treatment treat_directory(struct dir_struct *dir, struct index_state *istate, struct untracked_cache_dir *untracked, - const char *dirname, int len, int baselen, int exclude, + const char *dirname, int len, int baselen, int excluded, const struct pathspec *pathspec) { - int nested_repo = 0; - + /* + * WARNING: From this function, you can return path_recurse or you + * can call read_directory_recursive() (or neither), but + * you CAN'T DO BOTH. + */ + enum path_treatment state; + int matches_how = 0; + int nested_repo = 0, check_only, stop_early; + int old_ignored_nr, old_untracked_nr; /* The "len-1" is to strip the final '/' */ - switch (directory_exists_in_index(istate, dirname, len-1)) { - case index_directory: - return path_recurse; + enum exist_status status = directory_exists_in_index(istate, dirname, len-1); - case index_gitdir: + if (status == index_directory) + return path_recurse; + if (status == index_gitdir) return path_none; + if (status != index_nonexistent) + BUG("Unhandled value for directory_exists_in_index: %d\n", status); - case index_nonexistent: - if ((dir->flags & DIR_SKIP_NESTED_GIT) || - !(dir->flags & DIR_NO_GITLINKS)) { - struct strbuf sb = STRBUF_INIT; - strbuf_addstr(&sb, dirname); - nested_repo = is_nonbare_repository_dir(&sb); - strbuf_release(&sb); - } - if (nested_repo) - return ((dir->flags & DIR_SKIP_NESTED_GIT) ? path_none : - (exclude ? path_excluded : path_untracked)); + /* + * We don't want to descend into paths that don't match the necessary + * patterns. Clearly, if we don't have a pathspec, then we can't check + * for matching patterns. Also, if (excluded) then we know we matched + * the exclusion patterns so as an optimization we can skip checking + * for matching patterns. + */ + if (pathspec && !excluded) { + matches_how = match_pathspec_with_flags(istate, pathspec, + dirname, len, + 0 /* prefix */, + NULL /* seen */, + DO_MATCH_LEADING_PATHSPEC); + if (!matches_how) + return path_none; + } - if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES) - break; - if (exclude && - (dir->flags & DIR_SHOW_IGNORED_TOO) && - (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING)) { + + if ((dir->flags & DIR_SKIP_NESTED_GIT) || + !(dir->flags & DIR_NO_GITLINKS)) { + struct strbuf sb = STRBUF_INIT; + strbuf_addstr(&sb, dirname); + nested_repo = is_nonbare_repository_dir(&sb); + strbuf_release(&sb); + } + if (nested_repo) + return ((dir->flags & DIR_SKIP_NESTED_GIT) ? path_none : + (excluded ? path_excluded : path_untracked)); + + if (!(dir->flags & DIR_SHOW_OTHER_DIRECTORIES)) { + if (excluded && + (dir->flags & DIR_SHOW_IGNORED_TOO) && + (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING)) { /* * This is an excluded directory and we are @@ -1783,18 +1826,135 @@ static enum path_treatment treat_directory(struct dir_struct *dir, /* This is the "show_other_directories" case */ - if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES)) - return exclude ? path_excluded : path_untracked; + /* + * If we have a pathspec which could match something _below_ this + * directory (e.g. when checking 'subdir/' having a pathspec like + * 'subdir/some/deep/path/file' or 'subdir/widget-*.c'), then we + * need to recurse. + */ + if (matches_how == MATCHED_RECURSIVELY_LEADING_PATHSPEC) + return path_recurse; + + /* + * Other than the path_recurse case immediately above, we only need + * to recurse into untracked/ignored directories if either of the + * following bits is set: + * - DIR_SHOW_IGNORED_TOO (because then we need to determine if + * there are ignored entries below) + * - DIR_HIDE_EMPTY_DIRECTORIES (because we have to determine if + * the directory is empty) + */ + if (!(dir->flags & (DIR_SHOW_IGNORED_TOO | DIR_HIDE_EMPTY_DIRECTORIES))) + return excluded ? path_excluded : path_untracked; + + /* + * ...and even if DIR_SHOW_IGNORED_TOO is set, we can still avoid + * recursing into ignored directories if the path is excluded and + * DIR_SHOW_IGNORED_TOO_MODE_MATCHING is also set. + */ + if (excluded && + (dir->flags & DIR_SHOW_IGNORED_TOO) && + (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING)) + return path_excluded; + + /* + * Even if we don't want to know all the paths under an untracked or + * ignored directory, we may still need to go into the directory to + * determine if it is empty (because with DIR_HIDE_EMPTY_DIRECTORIES, + * an empty directory should be path_none instead of path_excluded or + * path_untracked). + */ + check_only = ((dir->flags & DIR_HIDE_EMPTY_DIRECTORIES) && + !(dir->flags & DIR_SHOW_IGNORED_TOO)); + + /* + * However, there's another optimization possible as a subset of + * check_only, based on the cases we have to consider: + * A) Directory matches no exclude patterns: + * * Directory is empty => path_none + * * Directory has an untracked file under it => path_untracked + * * Directory has only ignored files under it => path_excluded + * B) Directory matches an exclude pattern: + * * Directory is empty => path_none + * * Directory has an untracked file under it => path_excluded + * * Directory has only ignored files under it => path_excluded + * In case A, we can exit as soon as we've found an untracked + * file but otherwise have to walk all files. In case B, though, + * we can stop at the first file we find under the directory. + */ + stop_early = check_only && excluded; + /* + * If /every/ file within an untracked directory is ignored, then + * we want to treat the directory as ignored (for e.g. status + * --porcelain), without listing the individual ignored files + * underneath. To do so, we'll save the current ignored_nr, and + * pop all the ones added after it if it turns out the entire + * directory is ignored. Also, when DIR_SHOW_IGNORED_TOO and + * !DIR_KEEP_UNTRACKED_CONTENTS then we don't want to show + * untracked paths so will need to pop all those off the last + * after we traverse. + */ + old_ignored_nr = dir->ignored_nr; + old_untracked_nr = dir->nr; + + /* Actually recurse into dirname now, we'll fixup the state later. */ untracked = lookup_untracked(dir->untracked, untracked, dirname + baselen, len - baselen); + state = read_directory_recursive(dir, istate, dirname, len, untracked, + check_only, stop_early, pathspec); + + /* There are a variety of reasons we may need to fixup the state... */ + if (state == path_excluded) { + /* state == path_excluded implies all paths under + * dirname were ignored... + * + * if running e.g. `git status --porcelain --ignored=matching`, + * then we want to see the subpaths that are ignored. + * + * if running e.g. just `git status --porcelain`, then + * we just want the directory itself to be listed as ignored + * and not the individual paths underneath. + */ + int want_ignored_subpaths = + ((dir->flags & DIR_SHOW_IGNORED_TOO) && + (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING)); + + if (want_ignored_subpaths) { + /* + * with --ignored=matching, we want the subpaths + * INSTEAD of the directory itself. + */ + state = path_none; + } else { + int i; + for (i = old_ignored_nr + 1; i<dir->ignored_nr; ++i) + FREE_AND_NULL(dir->ignored[i]); + dir->ignored_nr = old_ignored_nr; + } + } + + /* + * We may need to ignore some of the untracked paths we found while + * traversing subdirectories. + */ + if ((dir->flags & DIR_SHOW_IGNORED_TOO) && + !(dir->flags & DIR_KEEP_UNTRACKED_CONTENTS)) { + int i; + for (i = old_untracked_nr + 1; i<dir->nr; ++i) + FREE_AND_NULL(dir->entries[i]); + dir->nr = old_untracked_nr; + } /* - * If this is an excluded directory, then we only need to check if - * the directory contains any files. + * If there is nothing under the current directory and we are not + * hiding empty directories, then we need to report on the + * untracked or ignored status of the directory itself. */ - return read_directory_recursive(dir, istate, dirname, len, - untracked, 1, exclude, pathspec); + if (state == path_none && !(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES)) + state = excluded ? path_excluded : path_untracked; + + return state; } /* @@ -1934,85 +2094,6 @@ static int resolve_dtype(int dtype, struct index_state *istate, return dtype; } -static enum path_treatment treat_one_path(struct dir_struct *dir, - struct untracked_cache_dir *untracked, - struct index_state *istate, - struct strbuf *path, - int baselen, - const struct pathspec *pathspec, - int dtype) -{ - int exclude; - int has_path_in_index = !!index_file_exists(istate, path->buf, path->len, ignore_case); - enum path_treatment path_treatment; - - dtype = resolve_dtype(dtype, istate, path->buf, path->len); - - /* Always exclude indexed files */ - if (dtype != DT_DIR && has_path_in_index) - return path_none; - - /* - * When we are looking at a directory P in the working tree, - * there are three cases: - * - * (1) P exists in the index. Everything inside the directory P in - * the working tree needs to go when P is checked out from the - * index. - * - * (2) P does not exist in the index, but there is P/Q in the index. - * We know P will stay a directory when we check out the contents - * of the index, but we do not know yet if there is a directory - * P/Q in the working tree to be killed, so we need to recurse. - * - * (3) P does not exist in the index, and there is no P/Q in the index - * to require P to be a directory, either. Only in this case, we - * know that everything inside P will not be killed without - * recursing. - */ - if ((dir->flags & DIR_COLLECT_KILLED_ONLY) && - (dtype == DT_DIR) && - !has_path_in_index && - (directory_exists_in_index(istate, path->buf, path->len) == index_nonexistent)) - return path_none; - - exclude = is_excluded(dir, istate, path->buf, &dtype); - - /* - * Excluded? If we don't explicitly want to show - * ignored files, ignore it - */ - if (exclude && !(dir->flags & (DIR_SHOW_IGNORED|DIR_SHOW_IGNORED_TOO))) - return path_excluded; - - switch (dtype) { - default: - return path_none; - case DT_DIR: - strbuf_addch(path, '/'); - path_treatment = treat_directory(dir, istate, untracked, - path->buf, path->len, - baselen, exclude, pathspec); - /* - * If 1) we only want to return directories that - * match an exclude pattern and 2) this directory does - * not match an exclude pattern but all of its - * contents are excluded, then indicate that we should - * recurse into this directory (instead of marking the - * directory itself as an ignored path). - */ - if (!exclude && - path_treatment == path_excluded && - (dir->flags & DIR_SHOW_IGNORED_TOO) && - (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING)) - return path_recurse; - return path_treatment; - case DT_REG: - case DT_LNK: - return exclude ? path_excluded : path_untracked; - } -} - static enum path_treatment treat_path_fast(struct dir_struct *dir, struct untracked_cache_dir *untracked, struct cached_dir *cdir, @@ -2021,6 +2102,11 @@ static enum path_treatment treat_path_fast(struct dir_struct *dir, int baselen, const struct pathspec *pathspec) { + /* + * WARNING: From this function, you can return path_recurse or you + * can call read_directory_recursive() (or neither), but + * you CAN'T DO BOTH. + */ strbuf_setlen(path, baselen); if (!cdir->ucd) { strbuf_addstr(path, cdir->file); @@ -2054,6 +2140,8 @@ static enum path_treatment treat_path(struct dir_struct *dir, int baselen, const struct pathspec *pathspec) { + int has_path_in_index, dtype, excluded; + if (!cdir->d_name) return treat_path_fast(dir, untracked, cdir, istate, path, baselen, pathspec); @@ -2064,8 +2152,72 @@ static enum path_treatment treat_path(struct dir_struct *dir, if (simplify_away(path->buf, path->len, pathspec)) return path_none; - return treat_one_path(dir, untracked, istate, path, baselen, pathspec, - cdir->d_type); + dtype = resolve_dtype(cdir->d_type, istate, path->buf, path->len); + + /* Always exclude indexed files */ + has_path_in_index = !!index_file_exists(istate, path->buf, path->len, + ignore_case); + if (dtype != DT_DIR && has_path_in_index) + return path_none; + + /* + * When we are looking at a directory P in the working tree, + * there are three cases: + * + * (1) P exists in the index. Everything inside the directory P in + * the working tree needs to go when P is checked out from the + * index. + * + * (2) P does not exist in the index, but there is P/Q in the index. + * We know P will stay a directory when we check out the contents + * of the index, but we do not know yet if there is a directory + * P/Q in the working tree to be killed, so we need to recurse. + * + * (3) P does not exist in the index, and there is no P/Q in the index + * to require P to be a directory, either. Only in this case, we + * know that everything inside P will not be killed without + * recursing. + */ + if ((dir->flags & DIR_COLLECT_KILLED_ONLY) && + (dtype == DT_DIR) && + !has_path_in_index && + (directory_exists_in_index(istate, path->buf, path->len) == index_nonexistent)) + return path_none; + + excluded = is_excluded(dir, istate, path->buf, &dtype); + + /* + * Excluded? If we don't explicitly want to show + * ignored files, ignore it + */ + if (excluded && !(dir->flags & (DIR_SHOW_IGNORED|DIR_SHOW_IGNORED_TOO))) + return path_excluded; + + switch (dtype) { + default: + return path_none; + case DT_DIR: + /* + * WARNING: Do not ignore/amend the return value from + * treat_directory(), and especially do not change it to return + * path_recurse as that can cause exponential slowdown. + * Instead, modify treat_directory() to return the right value. + */ + strbuf_addch(path, '/'); + return treat_directory(dir, istate, untracked, + path->buf, path->len, + baselen, excluded, pathspec); + case DT_REG: + case DT_LNK: + if (pathspec && + !match_pathspec(istate, pathspec, path->buf, path->len, + 0 /* prefix */, NULL /* seen */, + 0 /* is_dir */)) + return path_none; + if (excluded) + return path_excluded; + return path_untracked; + } } static void add_untracked(struct untracked_cache_dir *dir, const char *name) @@ -2245,7 +2397,7 @@ static void add_path_to_appropriate_result_list(struct dir_struct *dir, * If 'stop_at_first_file' is specified, 'path_excluded' is returned * to signal that a file was found. This is the least significant value that * indicates that a file was encountered that does not depend on the order of - * whether an untracked or exluded path was encountered first. + * whether an untracked or excluded path was encountered first. * * Returns the most significant path_treatment value encountered in the scan. * If 'stop_at_first_file' is specified, `path_excluded` is the most @@ -2258,14 +2410,10 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, int stop_at_first_file, const struct pathspec *pathspec) { /* - * WARNING WARNING WARNING: - * - * Any updates to the traversal logic here may need corresponding - * updates in treat_leading_path(). See the commit message for the - * commit adding this warning as well as the commit preceding it - * for details. + * WARNING: Do NOT recurse unless path_recurse is returned from + * treat_path(). Recursing on any other return value + * can result in exponential slowdown. */ - struct cached_dir cdir; enum path_treatment state, subdir_state, dir_state = path_none; struct strbuf path = STRBUF_INIT; @@ -2287,13 +2435,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, dir_state = state; /* recurse into subdir if instructed by treat_path */ - if ((state == path_recurse) || - ((state == path_untracked) && - (resolve_dtype(cdir.d_type, istate, path.buf, path.len) == DT_DIR) && - ((dir->flags & DIR_SHOW_IGNORED_TOO) || - (pathspec && - do_match_pathspec(istate, pathspec, path.buf, path.len, - baselen, NULL, DO_MATCH_LEADING_PATHSPEC) == MATCHED_RECURSIVELY_LEADING_PATHSPEC)))) { + if (state == path_recurse) { struct untracked_cache_dir *ud; ud = lookup_untracked(dir->untracked, untracked, path.buf + baselen, @@ -2341,7 +2483,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, add_untracked(untracked, path.buf + baselen); break; } - /* skip the dir_add_* part */ + /* skip the add_path_to_appropriate_result_list() */ continue; } @@ -2377,15 +2519,6 @@ static int treat_leading_path(struct dir_struct *dir, const char *path, int len, const struct pathspec *pathspec) { - /* - * WARNING WARNING WARNING: - * - * Any updates to the traversal logic here may need corresponding - * updates in read_directory_recursive(). See 777b420347 (dir: - * synchronize treat_leading_path() and read_directory_recursive(), - * 2019-12-19) and its parent commit for details. - */ - struct strbuf sb = STRBUF_INIT; struct strbuf subdir = STRBUF_INIT; int prevlen, baselen; @@ -2436,23 +2569,7 @@ static int treat_leading_path(struct dir_struct *dir, strbuf_reset(&subdir); strbuf_add(&subdir, path+prevlen, baselen-prevlen); cdir.d_name = subdir.buf; - state = treat_path(dir, NULL, &cdir, istate, &sb, prevlen, - pathspec); - if (state == path_untracked && - resolve_dtype(cdir.d_type, istate, sb.buf, sb.len) == DT_DIR && - (dir->flags & DIR_SHOW_IGNORED_TOO || - do_match_pathspec(istate, pathspec, sb.buf, sb.len, - baselen, NULL, DO_MATCH_LEADING_PATHSPEC) == MATCHED_RECURSIVELY_LEADING_PATHSPEC)) { - if (!match_pathspec(istate, pathspec, sb.buf, sb.len, - 0 /* prefix */, NULL, - 0 /* do NOT special case dirs */)) - state = path_none; - add_path_to_appropriate_result_list(dir, NULL, &cdir, - istate, - &sb, baselen, - pathspec, state); - state = path_recurse; - } + state = treat_path(dir, NULL, &cdir, istate, &sb, prevlen, pathspec); if (state != path_recurse) break; /* do not recurse into it */ @@ -2652,28 +2769,6 @@ int read_directory(struct dir_struct *dir, struct index_state *istate, QSORT(dir->entries, dir->nr, cmp_dir_entry); QSORT(dir->ignored, dir->ignored_nr, cmp_dir_entry); - /* - * If DIR_SHOW_IGNORED_TOO is set, read_directory_recursive() will - * also pick up untracked contents of untracked dirs; by default - * we discard these, but given DIR_KEEP_UNTRACKED_CONTENTS we do not. - */ - if ((dir->flags & DIR_SHOW_IGNORED_TOO) && - !(dir->flags & DIR_KEEP_UNTRACKED_CONTENTS)) { - int i, j; - - /* remove from dir->entries untracked contents of untracked dirs */ - for (i = j = 0; j < dir->nr; j++) { - if (i && - check_dir_entry_contains(dir->entries[i - 1], dir->entries[j])) { - FREE_AND_NULL(dir->entries[j]); - } else { - dir->entries[i++] = dir->entries[j]; - } - } - - dir->nr = i; - } - trace_performance_leave("read directory %.*s", len, path); if (dir->untracked) { static int force_untracked_cache = -1; |