From 7fe1ffdafa56b8453a47a40b866d029f24a56d76 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Wed, 12 May 2021 17:28:15 +0000 Subject: dir: report number of visited directories and paths with trace2 Provide more statistics in trace2 output that include the number of directories and total paths visited by the directory traversal logic. Subsequent patches will take advantage of this to ensure we do not unnecessarily traverse into ignored directories. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- dir.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'dir.h') diff --git a/dir.h b/dir.h index facfae4740..70c750e305 100644 --- a/dir.h +++ b/dir.h @@ -336,6 +336,10 @@ struct dir_struct { struct oid_stat ss_info_exclude; struct oid_stat ss_excludes_file; unsigned unmanaged_exclude_files; + + /* Stats about the traversal */ + unsigned visited_paths; + unsigned visited_directories; }; /*Count the number of slashes for string s*/ -- cgit v1.2.3 From b548f0f1568f6b01e55ca69c24d3cb19489f92aa Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Wed, 12 May 2021 17:28:22 +0000 Subject: dir: introduce readdir_skip_dot_and_dotdot() helper Many places in the code were doing while ((d = readdir(dir)) != NULL) { if (is_dot_or_dotdot(d->d_name)) continue; ...process d... } Introduce a readdir_skip_dot_and_dotdot() helper to make that a one-liner: while ((d = readdir_skip_dot_and_dotdot(dir)) != NULL) { ...process d... } This helper particularly simplifies checks for empty directories. Also use this helper in read_cached_dir() so that our statistics are consistent across platforms. (In other words, read_cached_dir() should have been using is_dot_or_dotdot() and skipping such entries, but did not and left it to treat_path() to detect and mark such entries as path_none.) Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- dir.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'dir.h') diff --git a/dir.h b/dir.h index 70c750e305..6b3fac0829 100644 --- a/dir.h +++ b/dir.h @@ -342,6 +342,8 @@ struct dir_struct { unsigned visited_directories; }; +struct dirent *readdir_skip_dot_and_dotdot(DIR *dirp); + /*Count the number of slashes for string s*/ int count_slashes(const char *s); -- cgit v1.2.3 From ce93a4c6127abdf1ad9eacd537edd1c571a18e41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 1 Jul 2021 12:51:27 +0200 Subject: dir.[ch]: replace dir_init() with DIR_INIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the dir_init() function and replace it with a DIR_INIT macro. In many cases in the codebase we need to initialize things with a function for good reasons, e.g. needing to call another function on initialization. The "dir_init()" function was not one such case, and could trivially be replaced with a more idiomatic macro initialization pattern. The only place where we made use of its use of memset() was in dir_clear() itself, which resets the contents of an an existing struct pointer. Let's use the new "memcpy() a 'blank' struct on the stack" idiom to do that reset. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- dir.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'dir.h') diff --git a/dir.h b/dir.h index e3db9b9ec6..8d0ddd8f18 100644 --- a/dir.h +++ b/dir.h @@ -342,6 +342,8 @@ struct dir_struct { unsigned visited_directories; }; +#define DIR_INIT { 0 } + struct dirent *readdir_skip_dot_and_dotdot(DIR *dirp); /*Count the number of slashes for string s*/ @@ -367,8 +369,6 @@ int match_pathspec(struct index_state *istate, int report_path_error(const char *ps_matched, const struct pathspec *pathspec); int within_depth(const char *name, int namelen, int depth, int max_depth); -void dir_init(struct dir_struct *dir); - int fill_directory(struct dir_struct *dir, struct index_state *istate, const struct pathspec *pathspec); -- cgit v1.2.3 From cf2dc1c238c6fd5f93c315a3045ccf95459701cd Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 7 Jul 2021 23:10:15 +0000 Subject: speed up alt_odb_usable() with many alternates With many alternates, the duplicate check in alt_odb_usable() wastes many cycles doing repeated fspathcmp() on every existing alternate. Use a khash to speed up lookups by odb->path. Since the kh_put_* API uses the supplied key without duplicating it, we also take advantage of it to replace both xstrdup() and strbuf_release() in link_alt_odb_entry() with strbuf_detach() to avoid the allocation and copy. In a test repository with 50K alternates and each of those 50K alternates having one alternate each (for a total of 100K total alternates); this speeds up lookup of a non-existent blob from over 16 minutes to roughly 2.7 seconds on my busy workstation. Note: all underlying git object directories were small and unpacked with only loose objects and no packs. Having to load packs increases times significantly. Signed-off-by: Eric Wong Signed-off-by: Junio C Hamano --- dir.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'dir.h') diff --git a/dir.h b/dir.h index e3db9b9ec6..2af7bcd7e5 100644 --- a/dir.h +++ b/dir.h @@ -489,7 +489,9 @@ int remove_dir_recursively(struct strbuf *path, int flag); int remove_path(const char *path); int fspathcmp(const char *a, const char *b); +int fspatheq(const char *a, const char *b); int fspathncmp(const char *a, const char *b, size_t count); +unsigned int fspathhash(const char *str); /* * The prefix part of pattern must not contains wildcards. -- cgit v1.2.3