Fix scan stats to be both consistent and actually accurate

This commit is contained in:
Seednode 2023-09-16 18:04:44 -05:00
parent 16d1428a52
commit 280656993d
2 changed files with 59 additions and 45 deletions

View File

@ -35,11 +35,6 @@ type regexes struct {
filename *regexp.Regexp filename *regexp.Regexp
} }
type concurrency struct {
directoryScans chan int
fileScans chan int
}
type scanStats struct { type scanStats struct {
filesMatched int filesMatched int
filesSkipped int filesSkipped int
@ -47,6 +42,13 @@ type scanStats struct {
directoriesSkipped int directoriesSkipped int
} }
type scanStatsChannels struct {
filesMatched chan int
filesSkipped chan int
directoriesMatched chan int
directoriesSkipped chan int
}
type splitPath struct { type splitPath struct {
base string base string
number int number int
@ -295,49 +297,42 @@ func pathCount(path string) (int, int, error) {
return files, directories, nil return files, directories, nil
} }
func scanPath(path string, fileChannel chan<- string, statChannel chan<- *scanStats, errorChannel chan<- error, concurrency *concurrency, formats *types.Types) { func scanPath(path string, fileChannel chan<- string, fileScans chan int, stats *scanStatsChannels, formats *types.Types) error {
var wg sync.WaitGroup var wg sync.WaitGroup
stats := &scanStats{ errorChannel := make(chan error)
filesMatched: 0, done := make(chan bool, 1)
filesSkipped: 0,
directoriesMatched: 0,
directoriesSkipped: 0,
}
err := filepath.WalkDir(path, func(p string, info os.DirEntry, err error) error {
if err != nil {
return err
}
filepath.WalkDir(path, func(p string, info os.DirEntry, err error) error {
switch { switch {
case !Recursive && info.IsDir() && p != path: case !Recursive && info.IsDir() && p != path:
return filepath.SkipDir return filepath.SkipDir
case !info.IsDir(): case !info.IsDir():
wg.Add(1) wg.Add(1)
concurrency.fileScans <- 1 fileScans <- 1
go func() { go func() {
defer func() { defer func() {
<-concurrency.fileScans
wg.Done() wg.Done()
<-fileScans
}() }()
path, err := normalizePath(p) path, err := normalizePath(p)
if err != nil { if err != nil {
errorChannel <- err errorChannel <- err
return
} }
if !formats.Validate(path) { if !formats.Validate(path) {
stats.filesSkipped = stats.filesSkipped + 1 stats.filesSkipped <- 1
return return
} }
fileChannel <- path fileChannel <- path
stats.filesMatched = stats.filesMatched + 1 stats.filesMatched <- 1
}() }()
case info.IsDir(): case info.IsDir():
files, directories, err := pathCount(p) files, directories, err := pathCount(p)
@ -347,33 +342,43 @@ func scanPath(path string, fileChannel chan<- string, statChannel chan<- *scanSt
if files > 0 && (files < int(MinimumFileCount)) || (files > int(MaximumFileCount)) { if files > 0 && (files < int(MinimumFileCount)) || (files > int(MaximumFileCount)) {
// This count will not otherwise include the parent directory itself, so increment by one // This count will not otherwise include the parent directory itself, so increment by one
stats.directoriesSkipped = stats.directoriesSkipped + directories + 1 stats.directoriesSkipped <- directories + 1
stats.filesSkipped = stats.filesSkipped + files stats.filesSkipped <- files
return filepath.SkipDir return filepath.SkipDir
} }
stats.directoriesMatched = stats.directoriesMatched + 1 stats.directoriesMatched <- 1
} }
return err return nil
}) })
wg.Wait() go func() {
wg.Wait()
done <- true
}()
statChannel <- stats Poll:
for {
if err != nil { select {
errorChannel <- err case e := <-errorChannel:
return e
case <-done:
break Poll
}
} }
return nil
} }
func scanPaths(paths []string, sort string, cache *fileCache, formats *types.Types) ([]string, error) { func scanPaths(paths []string, sort string, cache *fileCache, formats *types.Types) ([]string, error) {
var list []string var list []string
fileChannel := make(chan string) fileChannel := make(chan string)
statChannel := make(chan *scanStats)
errorChannel := make(chan error) errorChannel := make(chan error)
directoryScans := make(chan int, maxDirectoryScans)
fileScans := make(chan int, maxFileScans)
done := make(chan bool, 1) done := make(chan bool, 1)
stats := &scanStats{ stats := &scanStats{
@ -383,9 +388,11 @@ func scanPaths(paths []string, sort string, cache *fileCache, formats *types.Typ
directoriesSkipped: 0, directoriesSkipped: 0,
} }
concurrency := &concurrency{ statsChannels := &scanStatsChannels{
directoryScans: make(chan int, maxDirectoryScans), filesMatched: make(chan int),
fileScans: make(chan int, maxFileScans), filesSkipped: make(chan int),
directoriesMatched: make(chan int),
directoriesSkipped: make(chan int),
} }
var wg sync.WaitGroup var wg sync.WaitGroup
@ -394,16 +401,20 @@ func scanPaths(paths []string, sort string, cache *fileCache, formats *types.Typ
for i := 0; i < len(paths); i++ { for i := 0; i < len(paths); i++ {
wg.Add(1) wg.Add(1)
concurrency.directoryScans <- 1 directoryScans <- 1
go func(i int) { go func(i int) {
defer func() { defer func() {
<-concurrency.directoryScans
wg.Done() wg.Done()
<-directoryScans
}() }()
scanPath(paths[i], fileChannel, statChannel, errorChannel, concurrency, formats) err := scanPath(paths[i], fileChannel, fileScans, statsChannels, formats)
if err != nil {
errorChannel <- err
return
}
}(i) }(i)
} }
@ -417,11 +428,14 @@ Poll:
select { select {
case p := <-fileChannel: case p := <-fileChannel:
list = append(list, p) list = append(list, p)
case s := <-statChannel: case s := <-statsChannels.filesMatched:
stats.filesMatched = stats.filesMatched + s.filesMatched stats.filesMatched = stats.filesMatched + s
stats.filesSkipped = stats.filesSkipped + s.filesSkipped case s := <-statsChannels.filesSkipped:
stats.directoriesMatched = stats.directoriesMatched + s.directoriesMatched stats.filesSkipped = stats.filesSkipped + s
stats.directoriesSkipped = stats.directoriesSkipped + s.directoriesSkipped case s := <-statsChannels.directoriesMatched:
stats.directoriesMatched = stats.directoriesMatched + s
case s := <-statsChannels.directoriesSkipped:
stats.directoriesSkipped = stats.directoriesSkipped + s
case e := <-errorChannel: case e := <-errorChannel:
return []string{}, e return []string{}, e
case <-done: case <-done:

View File

@ -11,7 +11,7 @@ import (
) )
const ( const (
ReleaseVersion string = "0.90.3" ReleaseVersion string = "0.90.4"
) )
var ( var (