AdGuardHome/internal/aghos/filewalker.go
Eugene Burkov 2796e65468 Pull request: 2499 merge rewrites vol.1
Merge in DNS/adguard-home from 2499-merge-rewrites-vol.1 to master

Updates #2499.

Squashed commit of the following:

commit 6b308bc2b360cee8c22e506f31d62bacb4bf8fb3
Merge: f49e9186 2b635bf6
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Thu Oct 14 19:23:07 2021 +0300

    Merge branch 'master' into 2499-merge-rewrites-vol.1

commit f49e9186ffc8b7074d03c6721ee56cdb09243684
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Thu Oct 14 18:50:49 2021 +0300

    aghos: fix fs events filtering

commit 567dd646556606212af5dab60e3ecbb8fff22c25
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Thu Oct 14 16:50:37 2021 +0300

    all: imp code, docs, fix windows

commit 140c8bf519345eb54d0e7500a996fcf465353d71
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Wed Oct 13 19:41:53 2021 +0300

    aghnet: use const

commit bebf3f76bd394a498ccad812c57d4507c69529ba
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Wed Oct 13 19:32:37 2021 +0300

    all: imp tests, docs

commit 9bfdbb6eb454833135d616e208e82699f98e2562
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Wed Oct 13 18:42:20 2021 +0300

    all: imp path more, imp docs

commit ee9ea4c132a6b17787d150bf2bee703abaa57be3
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Wed Oct 13 16:09:46 2021 +0300

    all: fix windows, imp paths

commit 6fac8338a81e9ecfebfc23a1adcb964e89f6aee6
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Mon Oct 11 19:53:35 2021 +0300

    all: imp code, docs

commit da1ce1a2a3dd2be3fdff2412a6dbd596859dc249
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Mon Oct 11 18:22:50 2021 +0300

    aghnet: fix windows tests

commit d29de359ed68118d71efb226a8433fac15ff5c66
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Fri Oct 8 21:02:14 2021 +0300

    all: repl & imp

commit 1356c08944cdbb85ce5532d90fe5b077219ce5ff
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Fri Oct 8 01:41:19 2021 +0300

    all: add tests, mv logic, added tmpfs

commit f4b11adf8998bc8d9d955c5ac9f386f671bd5213
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Thu Oct 7 14:26:30 2021 +0300

    all: imp filewalker, refactor hosts container
2021-10-14 19:39:21 +03:00

135 lines
3.5 KiB
Go

package aghos
import (
"fmt"
"io"
"io/fs"
"github.com/AdguardTeam/AdGuardHome/internal/aghio"
"github.com/AdguardTeam/golibs/errors"
"github.com/AdguardTeam/golibs/stringutil"
)
// FileWalker is the signature of a function called for files in the file tree.
// As opposed to filepath.Walk it only walk the files (not directories) matching
// the provided pattern and those returned by function itself. All patterns
// should be valid for fs.Glob. If cont is false, the walking terminates. Each
// opened file is also limited for reading to MaxWalkedFileSize.
//
// TODO(e.burkov, a.garipov): Move into another package like aghfs.
//
// TODO(e.burkov): Think about passing filename or any additional data.
type FileWalker func(r io.Reader) (patterns []string, cont bool, err error)
// MaxWalkedFileSize is the maximum length of the file that FileWalker can
// check.
const MaxWalkedFileSize = 1024 * 1024
// checkFile tries to open and process a single file located on sourcePath in
// the specified fsys. The path is skipped if it's a directory.
func checkFile(
fsys fs.FS,
c FileWalker,
sourcePath string,
) (patterns []string, cont bool, err error) {
var f fs.File
f, err = fsys.Open(sourcePath)
if err != nil {
if errors.Is(err, fs.ErrNotExist) {
// Ignore non-existing files since this may only happen when the
// file was removed after filepath.Glob matched it.
return nil, true, nil
}
return nil, false, err
}
defer func() { err = errors.WithDeferred(err, f.Close()) }()
var fi fs.FileInfo
if fi, err = f.Stat(); err != nil {
return nil, true, err
}
if fi.IsDir() {
// Skip the directories.
return nil, true, nil
}
var r io.Reader
// Ignore the error since LimitReader function returns error only if passed
// limit value is less than zero, but the constant used.
//
// TODO(e.burkov): Make variable.
r, _ = aghio.LimitReader(f, MaxWalkedFileSize)
return c(r)
}
// handlePatterns parses the patterns in fsys and ignores duplicates using
// srcSet. srcSet must be non-nil.
func handlePatterns(
fsys fs.FS,
srcSet *stringutil.Set,
patterns ...string,
) (sub []string, err error) {
sub = make([]string, 0, len(patterns))
for _, p := range patterns {
var matches []string
matches, err = fs.Glob(fsys, p)
if err != nil {
// Enrich error with the pattern because filepath.Glob
// doesn't do it.
return nil, fmt.Errorf("invalid pattern %q: %w", p, err)
}
for _, m := range matches {
if srcSet.Has(m) {
continue
}
srcSet.Add(m)
sub = append(sub, m)
}
}
return sub, nil
}
// Walk starts walking the files in fsys defined by patterns from initial.
// It only returns true if fw signed to stop walking.
func (fw FileWalker) Walk(fsys fs.FS, initial ...string) (ok bool, err error) {
// The slice of sources keeps the order in which the files are walked since
// srcSet.Values() returns strings in undefined order.
srcSet := stringutil.NewSet()
var src []string
src, err = handlePatterns(fsys, srcSet, initial...)
if err != nil {
return false, err
}
var filename string
defer func() { err = errors.Annotate(err, "checking %q: %w", filename) }()
for i := 0; i < len(src); i++ {
var patterns []string
var cont bool
filename = src[i]
patterns, cont, err = checkFile(fsys, fw, src[i])
if err != nil {
return false, err
}
if !cont {
return true, nil
}
var subsrc []string
subsrc, err = handlePatterns(fsys, srcSet, patterns...)
if err != nil {
return false, err
}
src = append(src, subsrc...)
}
return false, nil
}