roulette/cmd/files.go

535 lines
10 KiB
Go
Raw Normal View History

2022-09-08 15:57:59 +00:00
/*
2023-01-18 17:19:29 +00:00
Copyright © 2023 Seednode <seednode@seedno.de>
2022-09-08 15:57:59 +00:00
*/
package cmd
import (
"errors"
"fmt"
2023-09-10 17:16:50 +00:00
"math/big"
"regexp"
2023-09-10 17:16:50 +00:00
"crypto/rand"
2022-09-08 15:57:59 +00:00
"os"
"path/filepath"
2022-10-23 21:29:58 +00:00
"runtime"
"strconv"
"strings"
"sync"
"sync/atomic"
2022-09-08 15:57:59 +00:00
"time"
"seedno.de/seednode/roulette/types"
2022-09-08 15:57:59 +00:00
)
type maxConcurrency int
const (
// avoid hitting default open file descriptor limits (1024)
maxDirectoryScans maxConcurrency = 32
maxFileScans maxConcurrency = 256
)
type regexes struct {
alphanumeric *regexp.Regexp
filename *regexp.Regexp
}
type concurrency struct {
directoryScans chan int
fileScans chan int
}
type files struct {
mutex sync.RWMutex
list []string
}
func (f *files) append(path string) {
f.mutex.Lock()
f.list = append(f.list, path)
f.mutex.Unlock()
}
type scanStats struct {
filesMatched atomic.Uint32
filesSkipped atomic.Uint32
directoriesMatched atomic.Uint32
directoriesSkipped atomic.Uint32
}
type splitPath struct {
base string
number int
extension string
}
func (splitPath *splitPath) increment() {
splitPath.number = splitPath.number + 1
}
func (splitPath *splitPath) decrement() {
splitPath.number = splitPath.number - 1
}
func humanReadableSize(bytes int) string {
const unit = 1000
if bytes < unit {
return fmt.Sprintf("%d B", bytes)
}
div, exp := int64(unit), 0
for n := bytes / unit; n >= unit; n /= unit {
div *= unit
exp++
}
return fmt.Sprintf("%.1f %cB",
float64(bytes)/float64(div), "KMGTPE"[exp])
}
func preparePath(path string) string {
if runtime.GOOS == "windows" {
return fmt.Sprintf("%s/%s", mediaPrefix, filepath.ToSlash(path))
}
return mediaPrefix + path
2023-01-19 18:07:15 +00:00
}
func newFile(paths []string, filters *filters, sortOrder string, regexes *regexes, cache *fileCache, formats *types.Types) (string, error) {
path, err := pickFile(paths, filters, sortOrder, cache, formats)
if err != nil {
return "", nil
}
splitPath, err := split(path, regexes)
if err != nil {
return "", err
}
splitPath.number = 1
switch {
case sortOrder == "asc":
path, err = tryExtensions(splitPath, formats)
if err != nil {
return "", err
}
case sortOrder == "desc":
for {
splitPath.increment()
path, err = tryExtensions(splitPath, formats)
2022-10-18 21:46:55 +00:00
if err != nil {
return "", err
}
if path == "" {
splitPath.decrement()
path, err = tryExtensions(splitPath, formats)
if err != nil {
return "", err
}
2022-09-17 17:42:25 +00:00
break
}
}
}
return path, nil
}
func nextFile(path, sortOrder string, regexes *regexes, formats *types.Types) (string, error) {
splitPath, err := split(path, regexes)
if err != nil {
return "", err
}
switch {
case sortOrder == "asc":
splitPath.increment()
case sortOrder == "desc":
splitPath.decrement()
default:
return "", nil
}
fileName, err := tryExtensions(splitPath, formats)
if err != nil {
return "", err
}
return fileName, err
}
func split(path string, regexes *regexes) (*splitPath, error) {
p := splitPath{}
var err error
2022-10-18 21:46:55 +00:00
split := regexes.filename.FindAllStringSubmatch(path, -1)
if len(split) < 1 || len(split[0]) < 3 {
return &splitPath{}, nil
}
p.base = split[0][1]
p.number, err = strconv.Atoi(split[0][2])
if err != nil {
return &splitPath{}, err
}
p.extension = split[0][3]
return &p, nil
}
func tryExtensions(splitPath *splitPath, formats *types.Types) (string, error) {
var fileName string
for extension := range formats.Extensions {
fileName = fmt.Sprintf("%s%.3d%s", splitPath.base, splitPath.number, extension)
exists, err := fileExists(fileName)
if err != nil {
return "", err
}
2022-10-18 21:46:55 +00:00
if exists {
return fileName, nil
}
}
return "", nil
}
func fileExists(path string) (bool, error) {
_, err := os.Stat(path)
switch {
case err == nil:
return true, nil
case errors.Is(err, os.ErrNotExist):
return false, nil
default:
return false, err
}
}
func pathIsValid(path string, paths []string) bool {
var matchesPrefix = false
for i := 0; i < len(paths); i++ {
if strings.HasPrefix(path, paths[i]) {
matchesPrefix = true
}
}
2022-10-25 05:06:57 +00:00
switch {
case Verbose && !matchesPrefix:
fmt.Printf("%s | Error: Failed to serve file outside specified path(s): %s\n",
time.Now().Format(logDate),
path,
)
return false
2022-10-25 05:06:57 +00:00
case !matchesPrefix:
return false
default:
return true
}
}
func pathHasSupportedFiles(path string, formats *types.Types) (bool, error) {
hasRegisteredFiles := make(chan bool, 1)
2023-04-11 09:44:18 +00:00
err := filepath.WalkDir(path, func(p string, info os.DirEntry, err error) error {
if err != nil {
return err
}
switch {
case !Recursive && info.IsDir() && p != path:
2023-04-11 09:44:18 +00:00
return filepath.SkipDir
case !info.IsDir() && formats.Validate(p):
hasRegisteredFiles <- true
2023-09-14 22:37:22 +00:00
return filepath.SkipAll
2023-04-11 09:44:18 +00:00
}
return err
})
if err != nil {
return false, err
}
select {
case <-hasRegisteredFiles:
2023-04-11 09:44:18 +00:00
return true, nil
default:
return false, nil
}
}
func pathCount(path string) (uint32, uint32, error) {
var directories uint32 = 0
var files uint32 = 0
nodes, err := os.ReadDir(path)
if err != nil {
return 0, 0, err
}
for _, node := range nodes {
if node.IsDir() {
directories++
} else {
files++
}
}
return files, directories, nil
}
func scanPath(path string, files *files, stats *scanStats, concurrency *concurrency, formats *types.Types) error {
var wg sync.WaitGroup
err := filepath.WalkDir(path, func(p string, info os.DirEntry, err error) error {
if err != nil {
return err
}
2022-09-08 20:30:51 +00:00
switch {
case !Recursive && info.IsDir() && p != path:
return filepath.SkipDir
case !info.IsDir():
wg.Add(1)
concurrency.fileScans <- 1
go func() {
defer func() {
<-concurrency.fileScans
2023-06-03 23:45:32 +00:00
wg.Done()
}()
path, err := normalizePath(p)
if err != nil {
fmt.Println(err)
}
if !formats.Validate(path) {
stats.filesSkipped.Add(1)
return
}
files.append(path)
stats.filesMatched.Add(1)
}()
case info.IsDir():
files, directories, err := pathCount(p)
if err != nil {
fmt.Println(err)
}
if files > 0 && (files < MinimumFileCount) || (files > MaximumFileCount) {
// This count will not otherwise include the parent directory itself, so increment by one
2023-08-13 22:29:28 +00:00
stats.directoriesSkipped.Add(directories + 1)
stats.filesSkipped.Add(files)
return filepath.SkipDir
}
stats.directoriesMatched.Add(1)
2022-09-08 17:12:58 +00:00
}
2022-09-08 17:12:58 +00:00
return err
})
wg.Wait()
if err != nil {
return err
}
2022-09-08 17:12:58 +00:00
return nil
2022-09-08 17:12:58 +00:00
}
func scanPaths(paths []string, sort string, cache *fileCache, formats *types.Types) []string {
files := &files{
mutex: sync.RWMutex{},
list: []string{},
}
stats := &scanStats{
filesMatched: atomic.Uint32{},
filesSkipped: atomic.Uint32{},
directoriesMatched: atomic.Uint32{},
directoriesSkipped: atomic.Uint32{},
}
2022-09-08 15:57:59 +00:00
concurrency := &concurrency{
directoryScans: make(chan int, maxDirectoryScans),
fileScans: make(chan int, maxFileScans),
}
var wg sync.WaitGroup
startTime := time.Now()
for i := 0; i < len(paths); i++ {
wg.Add(1)
concurrency.directoryScans <- 1
go func(i int) {
defer func() {
<-concurrency.directoryScans
2023-06-03 23:45:32 +00:00
wg.Done()
}()
err := scanPath(paths[i], files, stats, concurrency, formats)
if err != nil {
fmt.Println(err)
}
}(i)
}
wg.Wait()
2022-09-08 17:12:58 +00:00
if stats.filesMatched.Load() < 1 {
fmt.Println("No files matched")
return []string{}
}
if Verbose {
fmt.Printf("%s | Indexed %d/%d files across %d/%d directories in %s\n",
time.Now().Format(logDate),
stats.filesMatched.Load(),
stats.filesMatched.Load()+stats.filesSkipped.Load(),
stats.directoriesMatched.Load(),
stats.directoriesMatched.Load()+stats.directoriesSkipped.Load(),
time.Since(startTime),
)
}
return files.list
2022-09-08 20:30:51 +00:00
}
func fileList(paths []string, filters *filters, sort string, cache *fileCache, formats *types.Types) []string {
switch {
case Cache && !cache.isEmpty() && filters.isEmpty():
return cache.List()
case Cache && !cache.isEmpty() && !filters.isEmpty():
return filters.apply(cache.List())
case Cache && cache.isEmpty() && !filters.isEmpty():
cache.set(scanPaths(paths, sort, cache, formats))
return filters.apply(cache.List())
case Cache && cache.isEmpty() && filters.isEmpty():
cache.set(scanPaths(paths, sort, cache, formats))
return cache.List()
case !Cache && !filters.isEmpty():
return filters.apply(scanPaths(paths, sort, cache, formats))
default:
return scanPaths(paths, sort, cache, formats)
}
}
func pickFile(args []string, filters *filters, sort string, cache *fileCache, formats *types.Types) (string, error) {
list := fileList(args, filters, sort, cache, formats)
fileCount := len(list)
2023-04-10 20:53:01 +00:00
if fileCount < 1 {
return "", ErrNoMediaFound
}
r, err := rand.Int(rand.Reader, big.NewInt(int64(fileCount)))
2023-09-10 17:16:50 +00:00
if err != nil {
return "", err
}
val, err := strconv.Atoi(strconv.FormatInt(r.Int64(), 10))
if err != nil {
return "", err
}
return list[val], nil
}
func normalizePath(path string) (string, error) {
homeDir, err := os.UserHomeDir()
if err != nil {
return "", err
}
if path == "~" {
path = homeDir
} else if strings.HasPrefix(path, "~/") {
path = filepath.Join(homeDir, path[2:])
}
path, err = filepath.EvalSymlinks(path)
if err != nil {
return "", err
}
absolutePath, err := filepath.Abs(path)
if err != nil {
return "", err
}
return absolutePath, nil
}
func validatePaths(args []string, formats *types.Types) ([]string, error) {
var paths []string
2023-05-12 04:22:31 +00:00
var pathList strings.Builder
pathList.WriteString("Paths:\n")
for i := 0; i < len(args); i++ {
path, err := normalizePath(args[i])
2022-10-25 05:06:57 +00:00
if err != nil {
return nil, err
2022-10-25 05:06:57 +00:00
}
pathMatches := (args[i] == path)
hasSupportedFiles, err := pathHasSupportedFiles(path, formats)
2023-04-11 09:44:18 +00:00
if err != nil {
return nil, err
}
var addPath bool = false
switch {
case pathMatches && hasSupportedFiles:
2023-05-12 04:22:31 +00:00
pathList.WriteString(fmt.Sprintf("%s\n", args[i]))
2023-04-11 09:44:18 +00:00
addPath = true
case !pathMatches && hasSupportedFiles:
2023-05-12 04:22:31 +00:00
pathList.WriteString(fmt.Sprintf("%s (resolved to %s)\n", args[i], path))
2023-04-11 09:44:18 +00:00
addPath = true
case pathMatches && !hasSupportedFiles:
2023-05-12 04:22:31 +00:00
pathList.WriteString(fmt.Sprintf("%s [No supported files found]\n", args[i]))
case !pathMatches && !hasSupportedFiles:
2023-05-12 04:22:31 +00:00
pathList.WriteString(fmt.Sprintf("%s (resolved to %s) [No supported files found]\n", args[i], path))
2022-10-25 05:06:57 +00:00
}
2023-04-11 09:44:18 +00:00
if addPath {
paths = append(paths, path)
2023-04-11 09:44:18 +00:00
}
2022-09-08 20:30:51 +00:00
}
2023-05-12 04:22:31 +00:00
if len(paths) > 0 {
fmt.Println(pathList.String())
}
return paths, nil
2022-09-08 15:57:59 +00:00
}