From 900b59155b02ea7c45fe9f081d293806482b5529 Mon Sep 17 00:00:00 2001 From: Seednode Date: Tue, 3 Oct 2023 08:26:55 -0500 Subject: [PATCH] Replace filepath.WalkDir() with recursive function based on os.ReadDir(), to skip unnecessary Stat() calls --- README.md | 60 ++++++++++++++++++------------------- cmd/errors.go | 1 - cmd/files.go | 83 +++++++++++++++++++++++++-------------------------- cmd/root.go | 8 +---- 4 files changed, 70 insertions(+), 82 deletions(-) diff --git a/README.md b/README.md index e1d3899..7ba5b9f 100644 --- a/README.md +++ b/README.md @@ -106,37 +106,35 @@ Usage: roulette [path]... [flags] Flags: - -a, --all enable all supported file types - --audio enable support for audio files - -b, --bind string address to bind to (default "0.0.0.0") - --case-sensitive use case-sensitive matching for filters - --code enable support for source code files - --code-theme string theme for source code syntax highlighting (default "solarized-dark256") - --exit-on-error shut down webserver on error, instead of just printing the error - -f, --filter enable filtering - --flash enable support for shockwave flash files (via ruffle.rs) - --handlers display registered handlers (for debugging) - -h, --help help for roulette - --images enable support for image files - -c, --index generate index of supported file paths at startup - --index-file string path to optional persistent index file - -i, --info expose informational endpoints - --max-directory-scans int number of directories to scan at once (default 32) - --max-file-count int skip directories with file counts above this value (default 2147483647) - --max-file-scans int number of files to scan at once (default 256) - --min-file-count int skip directories with file counts below this value (default 1) - --page-length int pagination length for info pages - -p, --port int port to listen on (default 8080) - --prefix string root path for http handlers (for reverse proxying) (default "/") - --profile register net/http/pprof handlers - -r, --recursive recurse into subdirectories - --refresh enable automatic page refresh via query parameter - --russian remove selected images after serving - -s, --sort enable sorting - --text enable support for text files - -v, --verbose log accessed files and other information to stdout - -V, --version display version and exit - --video enable support for video files + -a, --all enable all supported file types + --audio enable support for audio files + -b, --bind string address to bind to (default "0.0.0.0") + --case-sensitive use case-sensitive matching for filters + --code enable support for source code files + --code-theme string theme for source code syntax highlighting (default "solarized-dark256") + --exit-on-error shut down webserver on error, instead of just printing the error + -f, --filter enable filtering + --flash enable support for shockwave flash files (via ruffle.rs) + --handlers display registered handlers (for debugging) + -h, --help help for roulette + --images enable support for image files + --index generate index of supported file paths at startup + --index-file string path to optional persistent index file + -i, --info expose informational endpoints + --max-file-count int skip directories with file counts above this value (default 2147483647) + --min-file-count int skip directories with file counts below this value (default 1) + --page-length int pagination length for info pages + -p, --port int port to listen on (default 8080) + --prefix string root path for http handlers (for reverse proxying) (default "/") + --profile register net/http/pprof handlers + -r, --recursive recurse into subdirectories + --refresh enable automatic page refresh via query parameter + --russian remove selected images after serving + -s, --sort enable sorting + --text enable support for text files + -v, --verbose log accessed files and other information to stdout + -V, --version display version and exit + --video enable support for video files ``` ## Building the Docker container diff --git a/cmd/errors.go b/cmd/errors.go index 26d4514..a5e4803 100644 --- a/cmd/errors.go +++ b/cmd/errors.go @@ -19,7 +19,6 @@ var ( ErrInvalidFileCountRange = errors.New("maximum file count limit must be greater than or equal to minimum file count limit") ErrInvalidFileCountValue = errors.New("file count limits must be positive integers no greater than 2147483647") ErrInvalidPort = errors.New("listen port must be an integer between 1 and 65535 inclusive") - ErrInvalidScanCount = errors.New("maximum scan count must be a positive integer no greater than 2147483647") ErrNoMediaFound = errors.New("no supported media formats found which match all criteria") ) diff --git a/cmd/files.go b/cmd/files.go index f55cb9e..b653cb3 100644 --- a/cmd/files.go +++ b/cmd/files.go @@ -226,15 +226,20 @@ func hasSupportedFiles(path string, formats *types.Types) (bool, error) { } } -func pathCount(path string) (int, int, error) { - var directories = 0 - var files = 0 +func walkPath(path string, fileChannel chan<- string, stats *scanStatsChannels, formats *types.Types) error { + var wg sync.WaitGroup + + errorChannel := make(chan error) + done := make(chan bool, 1) nodes, err := os.ReadDir(path) if err != nil { - return 0, 0, err + return err } + var directories = 0 + var files = 0 + for _, node := range nodes { if node.IsDir() { directories++ @@ -243,30 +248,43 @@ func pathCount(path string) (int, int, error) { } } - return files, directories, nil -} + var skipFiles = false -func walkPath(path string, fileChannel chan<- string, fileScans chan int, stats *scanStatsChannels, formats *types.Types) error { - var wg sync.WaitGroup + if files < MinFileCount || files > MaxFileCount { + stats.filesSkipped <- files + stats.directoriesSkipped <- 1 - errorChannel := make(chan error) - done := make(chan bool, 1) + skipFiles = true + } else { + stats.directoriesMatched <- 1 + } + + for _, node := range nodes { + fullPath := filepath.Join(path, node.Name()) - filepath.WalkDir(path, func(p string, info os.DirEntry, err error) error { switch { - case !Recursive && info.IsDir() && p != path: - return filepath.SkipDir - case !info.IsDir(): + case node.IsDir() && Recursive: wg.Add(1) - fileScans <- 1 go func() { defer func() { wg.Done() - <-fileScans }() + err = walkPath(fullPath, fileChannel, stats, formats) + if err != nil { + errorChannel <- err - path, err := normalizePath(p) + return + } + }() + case !node.IsDir() && !skipFiles: + wg.Add(1) + + go func() { + defer func() { + wg.Done() + }() + path, err := normalizePath(fullPath) if err != nil { errorChannel <- err @@ -279,29 +297,12 @@ func walkPath(path string, fileChannel chan<- string, fileScans chan int, stats return } - fileChannel <- path + fileChannel <- fullPath stats.filesMatched <- 1 }() - case info.IsDir(): - files, directories, err := pathCount(p) - if err != nil { - errorChannel <- err - } - - if files > 0 && (files < MinFileCount) || (files > MaxFileCount) { - // This count will not otherwise include the parent directory itself, so increment by one - stats.directoriesSkipped <- directories + 1 - stats.filesSkipped <- files - - return filepath.SkipDir - } - - stats.directoriesMatched <- 1 } - - return nil - }) + } go func() { wg.Wait() @@ -311,8 +312,8 @@ func walkPath(path string, fileChannel chan<- string, fileScans chan int, stats Poll: for { select { - case e := <-errorChannel: - return e + case err := <-errorChannel: + return err case <-done: break Poll } @@ -326,8 +327,6 @@ func scanPaths(paths []string, sort string, index *fileIndex, formats *types.Typ fileChannel := make(chan string) errorChannel := make(chan error) - directoryScans := make(chan int, MaxDirScans) - fileScans := make(chan int, MaxFileScans) done := make(chan bool, 1) stats := &scanStats{ @@ -350,15 +349,13 @@ func scanPaths(paths []string, sort string, index *fileIndex, formats *types.Typ for i := 0; i < len(paths); i++ { wg.Add(1) - directoryScans <- 1 go func(i int) { defer func() { wg.Done() - <-directoryScans }() + err := walkPath(paths[i], fileChannel, statsChannels, formats) - err := walkPath(paths[i], fileChannel, fileScans, statsChannels, formats) if err != nil { errorChannel <- err diff --git a/cmd/root.go b/cmd/root.go index 75879b8..a4f499a 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -12,7 +12,7 @@ import ( ) const ( - ReleaseVersion string = "2.1.1" + ReleaseVersion string = "2.2.0" ) var ( @@ -30,8 +30,6 @@ var ( Index bool IndexFile string Info bool - MaxDirScans int - MaxFileScans int MaxFileCount int MinFileCount int PageLength int @@ -53,8 +51,6 @@ var ( Args: cobra.MinimumNArgs(1), PreRunE: func(cmd *cobra.Command, args []string) error { switch { - case MaxDirScans < 1 || MaxFileScans < 1 || MaxDirScans > math.MaxInt32 || MaxFileScans > math.MaxInt32: - return ErrInvalidScanCount case MaxFileCount < 1 || MinFileCount < 1 || MaxFileCount > math.MaxInt32 || MinFileCount > math.MaxInt32: return ErrInvalidFileCountValue case MinFileCount > MaxFileCount: @@ -98,8 +94,6 @@ func init() { rootCmd.Flags().BoolVar(&Index, "index", false, "generate index of supported file paths at startup") rootCmd.Flags().StringVar(&IndexFile, "index-file", "", "path to optional persistent index file") rootCmd.Flags().BoolVarP(&Info, "info", "i", false, "expose informational endpoints") - rootCmd.Flags().IntVar(&MaxDirScans, "max-directory-scans", 32, "number of directories to scan at once") - rootCmd.Flags().IntVar(&MaxFileScans, "max-file-scans", 256, "number of files to scan at once") rootCmd.Flags().IntVar(&MaxFileCount, "max-file-count", math.MaxInt32, "skip directories with file counts above this value") rootCmd.Flags().IntVar(&MinFileCount, "min-file-count", 1, "skip directories with file counts below this value") rootCmd.Flags().IntVar(&PageLength, "page-length", 0, "pagination length for info pages")