diff options
Diffstat (limited to 'vendor/golang.org/x/tools/internal/gopathwalk/walk.go')
-rw-r--r-- | vendor/golang.org/x/tools/internal/gopathwalk/walk.go | 308 |
1 files changed, 157 insertions, 151 deletions
diff --git a/vendor/golang.org/x/tools/internal/gopathwalk/walk.go b/vendor/golang.org/x/tools/internal/gopathwalk/walk.go index 52f74e643..836151551 100644 --- a/vendor/golang.org/x/tools/internal/gopathwalk/walk.go +++ b/vendor/golang.org/x/tools/internal/gopathwalk/walk.go @@ -9,11 +9,13 @@ package gopathwalk import ( "bufio" "bytes" + "io" "io/fs" - "log" "os" "path/filepath" + "runtime" "strings" + "sync" "time" ) @@ -21,8 +23,13 @@ import ( type Options struct { // If Logf is non-nil, debug logging is enabled through this function. Logf func(format string, args ...interface{}) + // Search module caches. Also disables legacy goimports ignore rules. ModulesEnabled bool + + // Maximum number of concurrent calls to user-provided callbacks, + // or 0 for GOMAXPROCS. + Concurrency int } // RootType indicates the type of a Root. @@ -43,19 +50,28 @@ type Root struct { Type RootType } -// Walk walks Go source directories ($GOROOT, $GOPATH, etc) to find packages. +// Walk concurrently walks Go source directories ($GOROOT, $GOPATH, etc) to find packages. +// // For each package found, add will be called with the absolute // paths of the containing source directory and the package directory. +// +// Unlike filepath.WalkDir, Walk follows symbolic links +// (while guarding against cycles). func Walk(roots []Root, add func(root Root, dir string), opts Options) { WalkSkip(roots, add, func(Root, string) bool { return false }, opts) } -// WalkSkip walks Go source directories ($GOROOT, $GOPATH, etc) to find packages. +// WalkSkip concurrently walks Go source directories ($GOROOT, $GOPATH, etc) to +// find packages. +// // For each package found, add will be called with the absolute // paths of the containing source directory and the package directory. // For each directory that will be scanned, skip will be called // with the absolute paths of the containing source directory and the directory. // If skip returns false on a directory it will be processed. +// +// Unlike filepath.WalkDir, WalkSkip follows symbolic links +// (while guarding against cycles). func WalkSkip(roots []Root, add func(root Root, dir string), skip func(root Root, dir string) bool, opts Options) { for _, root := range roots { walkDir(root, add, skip, opts) @@ -64,45 +80,51 @@ func WalkSkip(roots []Root, add func(root Root, dir string), skip func(root Root // walkDir creates a walker and starts fastwalk with this walker. func walkDir(root Root, add func(Root, string), skip func(root Root, dir string) bool, opts Options) { + if opts.Logf == nil { + opts.Logf = func(format string, args ...interface{}) {} + } if _, err := os.Stat(root.Path); os.IsNotExist(err) { - if opts.Logf != nil { - opts.Logf("skipping nonexistent directory: %v", root.Path) - } + opts.Logf("skipping nonexistent directory: %v", root.Path) return } start := time.Now() - if opts.Logf != nil { - opts.Logf("scanning %s", root.Path) + opts.Logf("scanning %s", root.Path) + + concurrency := opts.Concurrency + if concurrency == 0 { + // The walk be either CPU-bound or I/O-bound, depending on what the + // caller-supplied add function does and the details of the user's platform + // and machine. Rather than trying to fine-tune the concurrency level for a + // specific environment, we default to GOMAXPROCS: it is likely to be a good + // choice for a CPU-bound add function, and if it is instead I/O-bound, then + // dealing with I/O saturation is arguably the job of the kernel and/or + // runtime. (Oversaturating I/O seems unlikely to harm performance as badly + // as failing to saturate would.) + concurrency = runtime.GOMAXPROCS(0) } - w := &walker{ - root: root, - add: add, - skip: skip, - opts: opts, - added: make(map[string]bool), + root: root, + add: add, + skip: skip, + opts: opts, + sem: make(chan struct{}, concurrency), } w.init() - // Add a trailing path separator to cause filepath.WalkDir to traverse symlinks. + w.sem <- struct{}{} path := root.Path - if len(path) == 0 { - path = "." + string(filepath.Separator) - } else if !os.IsPathSeparator(path[len(path)-1]) { - path = path + string(filepath.Separator) + if path == "" { + path = "." } - - if err := filepath.WalkDir(path, w.walk); err != nil { - logf := opts.Logf - if logf == nil { - logf = log.Printf - } - logf("scanning directory %v: %v", root.Path, err) + if fi, err := os.Lstat(path); err == nil { + w.walk(path, nil, fs.FileInfoToDirEntry(fi)) + } else { + w.opts.Logf("scanning directory %v: %v", root.Path, err) } + <-w.sem + w.walking.Wait() - if opts.Logf != nil { - opts.Logf("scanned %s in %v", root.Path, time.Since(start)) - } + opts.Logf("scanned %s in %v", root.Path, time.Since(start)) } // walker is the callback for fastwalk.Walk. @@ -112,10 +134,18 @@ type walker struct { skip func(Root, string) bool // The callback that will be invoked for every dir. dir is skipped if it returns true. opts Options // Options passed to Walk by the user. - pathSymlinks []os.FileInfo - ignoredDirs []string + walking sync.WaitGroup + sem chan struct{} // Channel of semaphore tokens; send to acquire, receive to release. + ignoredDirs []string - added map[string]bool + added sync.Map // map[string]bool +} + +// A symlinkList is a linked list of os.FileInfos for parent directories +// reached via symlinks. +type symlinkList struct { + info os.FileInfo + prev *symlinkList } // init initializes the walker based on its Options @@ -132,9 +162,7 @@ func (w *walker) init() { for _, p := range ignoredPaths { full := filepath.Join(w.root.Path, p) w.ignoredDirs = append(w.ignoredDirs, full) - if w.opts.Logf != nil { - w.opts.Logf("Directory added to ignore list: %s", full) - } + w.opts.Logf("Directory added to ignore list: %s", full) } } @@ -144,12 +172,10 @@ func (w *walker) init() { func (w *walker) getIgnoredDirs(path string) []string { file := filepath.Join(path, ".goimportsignore") slurp, err := os.ReadFile(file) - if w.opts.Logf != nil { - if err != nil { - w.opts.Logf("%v", err) - } else { - w.opts.Logf("Read %s", file) - } + if err != nil { + w.opts.Logf("%v", err) + } else { + w.opts.Logf("Read %s", file) } if err != nil { return nil @@ -183,149 +209,129 @@ func (w *walker) shouldSkipDir(dir string) bool { // walk walks through the given path. // -// Errors are logged if w.opts.Logf is non-nil, but otherwise ignored: -// walk returns only nil or fs.SkipDir. -func (w *walker) walk(path string, d fs.DirEntry, err error) error { - if err != nil { - // We have no way to report errors back through Walk or WalkSkip, - // so just log and ignore them. - if w.opts.Logf != nil { +// Errors are logged if w.opts.Logf is non-nil, but otherwise ignored. +func (w *walker) walk(path string, pathSymlinks *symlinkList, d fs.DirEntry) { + if d.Type()&os.ModeSymlink != 0 { + // Walk the symlink's target rather than the symlink itself. + // + // (Note that os.Stat, unlike the lower-lever os.Readlink, + // follows arbitrarily many layers of symlinks, so it will eventually + // reach either a non-symlink or a nonexistent target.) + // + // TODO(bcmills): 'go list all' itself ignores symlinks within GOROOT/src + // and GOPATH/src. Do we really need to traverse them here? If so, why? + + fi, err := os.Stat(path) + if err != nil { w.opts.Logf("%v", err) + return + } + + // Avoid walking symlink cycles: if we have already followed a symlink to + // this directory as a parent of itself, don't follow it again. + // + // This doesn't catch the first time through a cycle, but it also minimizes + // the number of extra stat calls we make if we *don't* encounter a cycle. + // Since we don't actually expect to encounter symlink cycles in practice, + // this seems like the right tradeoff. + for parent := pathSymlinks; parent != nil; parent = parent.prev { + if os.SameFile(fi, parent.info) { + return + } } - if d == nil { - // Nothing more to do: the error prevents us from knowing - // what path even represents. - return nil + + pathSymlinks = &symlinkList{ + info: fi, + prev: pathSymlinks, } + d = fs.FileInfoToDirEntry(fi) } if d.Type().IsRegular() { if !strings.HasSuffix(path, ".go") { - return nil + return } dir := filepath.Dir(path) if dir == w.root.Path && (w.root.Type == RootGOROOT || w.root.Type == RootGOPATH) { // Doesn't make sense to have regular files // directly in your $GOPATH/src or $GOROOT/src. - return nil + // + // TODO(bcmills): there are many levels of directory within + // RootModuleCache where this also wouldn't make sense, + // Can we generalize this to any directory without a corresponding + // import path? + return } - if !w.added[dir] { + if _, dup := w.added.LoadOrStore(dir, true); !dup { w.add(w.root, dir) - w.added[dir] = true } - return nil } - if d.IsDir() { - base := filepath.Base(path) - if base == "" || base[0] == '.' || base[0] == '_' || - base == "testdata" || - (w.root.Type == RootGOROOT && w.opts.ModulesEnabled && base == "vendor") || - (!w.opts.ModulesEnabled && base == "node_modules") { - return fs.SkipDir - } - if w.shouldSkipDir(path) { - return fs.SkipDir - } - return nil + if !d.IsDir() { + return } - if d.Type()&os.ModeSymlink != 0 { - // TODO(bcmills): 'go list all' itself ignores symlinks within GOROOT/src - // and GOPATH/src. Do we really need to traverse them here? If so, why? - - fi, err := os.Stat(path) - if err != nil || !fi.IsDir() { - // Not a directory. Just walk the file (or broken link) and be done. - return w.walk(path, fs.FileInfoToDirEntry(fi), err) - } - - // Avoid walking symlink cycles: if we have already followed a symlink to - // this directory as a parent of itself, don't follow it again. - // - // This doesn't catch the first time through a cycle, but it also minimizes - // the number of extra stat calls we make if we *don't* encounter a cycle. - // Since we don't actually expect to encounter symlink cycles in practice, - // this seems like the right tradeoff. - for _, parent := range w.pathSymlinks { - if os.SameFile(fi, parent) { - return nil - } - } + base := filepath.Base(path) + if base == "" || base[0] == '.' || base[0] == '_' || + base == "testdata" || + (w.root.Type == RootGOROOT && w.opts.ModulesEnabled && base == "vendor") || + (!w.opts.ModulesEnabled && base == "node_modules") || + w.shouldSkipDir(path) { + return + } - w.pathSymlinks = append(w.pathSymlinks, fi) - defer func() { - w.pathSymlinks = w.pathSymlinks[:len(w.pathSymlinks)-1] - }() + // Read the directory and walk its entries. - // On some platforms the OS (or the Go os package) sometimes fails to - // resolve directory symlinks before a trailing slash - // (even though POSIX requires it to do so). - // - // On macOS that failure may be caused by a known libc/kernel bug; - // see https://go.dev/issue/59586. - // - // On Windows before Go 1.21, it may be caused by a bug in - // os.Lstat (fixed in https://go.dev/cl/463177). - // - // Since we need to handle this explicitly on broken platforms anyway, - // it is simplest to just always do that and not rely on POSIX pathname - // resolution to walk the directory (such as by calling WalkDir with - // a trailing slash appended to the path). + f, err := os.Open(path) + if err != nil { + w.opts.Logf("%v", err) + return + } + defer f.Close() + + for { + // We impose an arbitrary limit on the number of ReadDir results per + // directory to limit the amount of memory consumed for stale or upcoming + // directory entries. The limit trades off CPU (number of syscalls to read + // the whole directory) against RAM (reachable directory entries other than + // the one currently being processed). // - // Instead, we make a sequence of walk calls — directly and through - // recursive calls to filepath.WalkDir — simulating what WalkDir would do - // if the symlink were a regular directory. - - // First we call walk on the path as a directory - // (instead of a symlink). - err = w.walk(path, fs.FileInfoToDirEntry(fi), nil) - if err == fs.SkipDir { - return nil - } else if err != nil { - // This should be impossible, but handle it anyway in case - // walk is changed to return other errors. - return err - } - - // Now read the directory and walk its entries. - ents, err := os.ReadDir(path) + // Since we process the directories recursively, we will end up maintaining + // a slice of entries for each level of the directory tree. + // (Compare https://go.dev/issue/36197.) + ents, err := f.ReadDir(1024) if err != nil { - // Report the ReadDir error, as filepath.WalkDir would do. - err = w.walk(path, fs.FileInfoToDirEntry(fi), err) - if err == fs.SkipDir { - return nil - } else if err != nil { - return err // Again, should be impossible. + if err != io.EOF { + w.opts.Logf("%v", err) } - // Fall through and iterate over whatever entries we did manage to get. + break } for _, d := range ents { nextPath := filepath.Join(path, d.Name()) if d.IsDir() { - // We want to walk the whole directory tree rooted at nextPath, - // not just the single entry for the directory. - err := filepath.WalkDir(nextPath, w.walk) - if err != nil && w.opts.Logf != nil { - w.opts.Logf("%v", err) - } - } else { - err := w.walk(nextPath, d, nil) - if err == fs.SkipDir { - // Skip the rest of the entries in the parent directory of nextPath - // (that is, path itself). - break - } else if err != nil { - return err // Again, should be impossible. + select { + case w.sem <- struct{}{}: + // Got a new semaphore token, so we can traverse the directory concurrently. + d := d + w.walking.Add(1) + go func() { + defer func() { + <-w.sem + w.walking.Done() + }() + w.walk(nextPath, pathSymlinks, d) + }() + continue + + default: + // No tokens available, so traverse serially. } } + + w.walk(nextPath, pathSymlinks, d) } - return nil } - - // Not a file, regular directory, or symlink; skip. - return nil } |