Replace filepath.WalkDir() with recursive function based on os.ReadDir(), to skip unnecessary Stat() calls

This commit is contained in:
Seednode 2023-10-03 08:26:55 -05:00
parent 64133afe33
commit 900b59155b
4 changed files with 70 additions and 82 deletions

View File

@ -118,12 +118,10 @@ Flags:
--handlers display registered handlers (for debugging) --handlers display registered handlers (for debugging)
-h, --help help for roulette -h, --help help for roulette
--images enable support for image files --images enable support for image files
-c, --index generate index of supported file paths at startup --index generate index of supported file paths at startup
--index-file string path to optional persistent index file --index-file string path to optional persistent index file
-i, --info expose informational endpoints -i, --info expose informational endpoints
--max-directory-scans int number of directories to scan at once (default 32)
--max-file-count int skip directories with file counts above this value (default 2147483647) --max-file-count int skip directories with file counts above this value (default 2147483647)
--max-file-scans int number of files to scan at once (default 256)
--min-file-count int skip directories with file counts below this value (default 1) --min-file-count int skip directories with file counts below this value (default 1)
--page-length int pagination length for info pages --page-length int pagination length for info pages
-p, --port int port to listen on (default 8080) -p, --port int port to listen on (default 8080)

View File

@ -19,7 +19,6 @@ var (
ErrInvalidFileCountRange = errors.New("maximum file count limit must be greater than or equal to minimum file count limit") ErrInvalidFileCountRange = errors.New("maximum file count limit must be greater than or equal to minimum file count limit")
ErrInvalidFileCountValue = errors.New("file count limits must be positive integers no greater than 2147483647") ErrInvalidFileCountValue = errors.New("file count limits must be positive integers no greater than 2147483647")
ErrInvalidPort = errors.New("listen port must be an integer between 1 and 65535 inclusive") ErrInvalidPort = errors.New("listen port must be an integer between 1 and 65535 inclusive")
ErrInvalidScanCount = errors.New("maximum scan count must be a positive integer no greater than 2147483647")
ErrNoMediaFound = errors.New("no supported media formats found which match all criteria") ErrNoMediaFound = errors.New("no supported media formats found which match all criteria")
) )

View File

@ -226,15 +226,20 @@ func hasSupportedFiles(path string, formats *types.Types) (bool, error) {
} }
} }
func pathCount(path string) (int, int, error) { func walkPath(path string, fileChannel chan<- string, stats *scanStatsChannels, formats *types.Types) error {
var directories = 0 var wg sync.WaitGroup
var files = 0
errorChannel := make(chan error)
done := make(chan bool, 1)
nodes, err := os.ReadDir(path) nodes, err := os.ReadDir(path)
if err != nil { if err != nil {
return 0, 0, err return err
} }
var directories = 0
var files = 0
for _, node := range nodes { for _, node := range nodes {
if node.IsDir() { if node.IsDir() {
directories++ directories++
@ -243,30 +248,43 @@ func pathCount(path string) (int, int, error) {
} }
} }
return files, directories, nil var skipFiles = false
if files < MinFileCount || files > MaxFileCount {
stats.filesSkipped <- files
stats.directoriesSkipped <- 1
skipFiles = true
} else {
stats.directoriesMatched <- 1
} }
func walkPath(path string, fileChannel chan<- string, fileScans chan int, stats *scanStatsChannels, formats *types.Types) error { for _, node := range nodes {
var wg sync.WaitGroup fullPath := filepath.Join(path, node.Name())
errorChannel := make(chan error)
done := make(chan bool, 1)
filepath.WalkDir(path, func(p string, info os.DirEntry, err error) error {
switch { switch {
case !Recursive && info.IsDir() && p != path: case node.IsDir() && Recursive:
return filepath.SkipDir
case !info.IsDir():
wg.Add(1) wg.Add(1)
fileScans <- 1
go func() { go func() {
defer func() { defer func() {
wg.Done() wg.Done()
<-fileScans
}() }()
err = walkPath(fullPath, fileChannel, stats, formats)
if err != nil {
errorChannel <- err
path, err := normalizePath(p) return
}
}()
case !node.IsDir() && !skipFiles:
wg.Add(1)
go func() {
defer func() {
wg.Done()
}()
path, err := normalizePath(fullPath)
if err != nil { if err != nil {
errorChannel <- err errorChannel <- err
@ -279,30 +297,13 @@ func walkPath(path string, fileChannel chan<- string, fileScans chan int, stats
return return
} }
fileChannel <- path fileChannel <- fullPath
stats.filesMatched <- 1 stats.filesMatched <- 1
}() }()
case info.IsDir():
files, directories, err := pathCount(p)
if err != nil {
errorChannel <- err
} }
if files > 0 && (files < MinFileCount) || (files > MaxFileCount) {
// This count will not otherwise include the parent directory itself, so increment by one
stats.directoriesSkipped <- directories + 1
stats.filesSkipped <- files
return filepath.SkipDir
} }
stats.directoriesMatched <- 1
}
return nil
})
go func() { go func() {
wg.Wait() wg.Wait()
done <- true done <- true
@ -311,8 +312,8 @@ func walkPath(path string, fileChannel chan<- string, fileScans chan int, stats
Poll: Poll:
for { for {
select { select {
case e := <-errorChannel: case err := <-errorChannel:
return e return err
case <-done: case <-done:
break Poll break Poll
} }
@ -326,8 +327,6 @@ func scanPaths(paths []string, sort string, index *fileIndex, formats *types.Typ
fileChannel := make(chan string) fileChannel := make(chan string)
errorChannel := make(chan error) errorChannel := make(chan error)
directoryScans := make(chan int, MaxDirScans)
fileScans := make(chan int, MaxFileScans)
done := make(chan bool, 1) done := make(chan bool, 1)
stats := &scanStats{ stats := &scanStats{
@ -350,15 +349,13 @@ func scanPaths(paths []string, sort string, index *fileIndex, formats *types.Typ
for i := 0; i < len(paths); i++ { for i := 0; i < len(paths); i++ {
wg.Add(1) wg.Add(1)
directoryScans <- 1
go func(i int) { go func(i int) {
defer func() { defer func() {
wg.Done() wg.Done()
<-directoryScans
}() }()
err := walkPath(paths[i], fileChannel, statsChannels, formats)
err := walkPath(paths[i], fileChannel, fileScans, statsChannels, formats)
if err != nil { if err != nil {
errorChannel <- err errorChannel <- err

View File

@ -12,7 +12,7 @@ import (
) )
const ( const (
ReleaseVersion string = "2.1.1" ReleaseVersion string = "2.2.0"
) )
var ( var (
@ -30,8 +30,6 @@ var (
Index bool Index bool
IndexFile string IndexFile string
Info bool Info bool
MaxDirScans int
MaxFileScans int
MaxFileCount int MaxFileCount int
MinFileCount int MinFileCount int
PageLength int PageLength int
@ -53,8 +51,6 @@ var (
Args: cobra.MinimumNArgs(1), Args: cobra.MinimumNArgs(1),
PreRunE: func(cmd *cobra.Command, args []string) error { PreRunE: func(cmd *cobra.Command, args []string) error {
switch { switch {
case MaxDirScans < 1 || MaxFileScans < 1 || MaxDirScans > math.MaxInt32 || MaxFileScans > math.MaxInt32:
return ErrInvalidScanCount
case MaxFileCount < 1 || MinFileCount < 1 || MaxFileCount > math.MaxInt32 || MinFileCount > math.MaxInt32: case MaxFileCount < 1 || MinFileCount < 1 || MaxFileCount > math.MaxInt32 || MinFileCount > math.MaxInt32:
return ErrInvalidFileCountValue return ErrInvalidFileCountValue
case MinFileCount > MaxFileCount: case MinFileCount > MaxFileCount:
@ -98,8 +94,6 @@ func init() {
rootCmd.Flags().BoolVar(&Index, "index", false, "generate index of supported file paths at startup") rootCmd.Flags().BoolVar(&Index, "index", false, "generate index of supported file paths at startup")
rootCmd.Flags().StringVar(&IndexFile, "index-file", "", "path to optional persistent index file") rootCmd.Flags().StringVar(&IndexFile, "index-file", "", "path to optional persistent index file")
rootCmd.Flags().BoolVarP(&Info, "info", "i", false, "expose informational endpoints") rootCmd.Flags().BoolVarP(&Info, "info", "i", false, "expose informational endpoints")
rootCmd.Flags().IntVar(&MaxDirScans, "max-directory-scans", 32, "number of directories to scan at once")
rootCmd.Flags().IntVar(&MaxFileScans, "max-file-scans", 256, "number of files to scan at once")
rootCmd.Flags().IntVar(&MaxFileCount, "max-file-count", math.MaxInt32, "skip directories with file counts above this value") rootCmd.Flags().IntVar(&MaxFileCount, "max-file-count", math.MaxInt32, "skip directories with file counts above this value")
rootCmd.Flags().IntVar(&MinFileCount, "min-file-count", 1, "skip directories with file counts below this value") rootCmd.Flags().IntVar(&MinFileCount, "min-file-count", 1, "skip directories with file counts below this value")
rootCmd.Flags().IntVar(&PageLength, "page-length", 0, "pagination length for info pages") rootCmd.Flags().IntVar(&PageLength, "page-length", 0, "pagination length for info pages")