AdGuardHome/home/filter.go

578 lines
14 KiB
Go
Raw Normal View History

package home
import (
"fmt"
"hash/crc32"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"time"
"github.com/AdguardTeam/AdGuardHome/dnsfilter"
2019-03-06 12:20:34 +03:00
"github.com/AdguardTeam/golibs/file"
"github.com/AdguardTeam/golibs/log"
)
var (
nextFilterID = time.Now().Unix() // semi-stable way to generate an unique ID
filterTitleRegexp = regexp.MustCompile(`^! Title: +(.*)$`)
refreshStatus uint32 // 0:none; 1:in progress
refreshLock sync.Mutex
)
func initFiltering() {
loadFilters()
deduplicateFilters()
updateUniqueFilterID(config.Filters)
2019-10-17 14:33:38 +03:00
}
func startFiltering() {
// Here we should start updating filters,
// but currently we can't wake up the periodic task to do so.
// So for now we just start this periodic task from here.
go periodicallyRefreshFilters()
}
func defaultFilters() []filter {
return []filter{
{Filter: dnsfilter.Filter{ID: 1}, Enabled: true, URL: "https://adguardteam.github.io/AdGuardSDNSFilter/Filters/filter.txt", Name: "AdGuard Simplified Domain Names filter"},
{Filter: dnsfilter.Filter{ID: 2}, Enabled: false, URL: "https://adaway.org/hosts.txt", Name: "AdAway"},
{Filter: dnsfilter.Filter{ID: 3}, Enabled: false, URL: "https://hosts-file.net/ad_servers.txt", Name: "hpHosts - Ad and Tracking servers only"},
{Filter: dnsfilter.Filter{ID: 4}, Enabled: false, URL: "https://www.malwaredomainlist.com/hostslist/hosts.txt", Name: "MalwareDomainList.com Hosts List"},
}
}
// field ordering is important -- yaml fields will mirror ordering from here
type filter struct {
Enabled bool
URL string
Name string `yaml:"name"`
RulesCount int `yaml:"-"`
LastUpdated time.Time `yaml:"-"`
checksum uint32 // checksum of the file data
dnsfilter.Filter `yaml:",inline"`
}
// Creates a helper object for working with the user rules
func userFilter() filter {
f := filter{
// User filter always has constant ID=0
Enabled: true,
}
f.Filter.Data = []byte(strings.Join(config.UserRules, "\n"))
return f
}
const (
statusFound = 1
statusEnabledChanged = 2
statusURLChanged = 4
statusURLExists = 8
)
// Update properties for a filter specified by its URL
// Return status* flags.
func filterSetProperties(url string, newf filter) int {
r := 0
config.Lock()
defer config.Unlock()
for i := range config.Filters {
f := &config.Filters[i]
if f.URL != url {
continue
}
log.Debug("filter: set properties: %s: {%s %s %v}",
f.URL, newf.Name, newf.URL, newf.Enabled)
f.Name = newf.Name
if f.URL != newf.URL {
r |= statusURLChanged
if filterExistsNoLock(newf.URL) {
return statusURLExists
}
f.URL = newf.URL
f.unload()
f.LastUpdated = time.Time{}
}
if f.Enabled != newf.Enabled {
r |= statusEnabledChanged
f.Enabled = newf.Enabled
if f.Enabled {
if (r & statusURLChanged) == 0 {
e := f.load()
if e != nil {
// This isn't a fatal error,
// because it may occur when someone removes the file from disk.
// In this case the periodic update task will try to download the file.
f.LastUpdated = time.Time{}
}
}
} else {
f.unload()
}
}
return r | statusFound
}
return 0
}
// Return TRUE if a filter with this URL exists
func filterExists(url string) bool {
config.RLock()
r := filterExistsNoLock(url)
config.RUnlock()
return r
}
// Return TRUE if a filter with this URL exists
func filterExistsNoLock(url string) bool {
r := false
for i := range config.Filters {
if config.Filters[i].URL == url {
r = true
break
}
}
return r
}
// Add a filter
// Return FALSE if a filter with this URL exists
func filterAdd(f filter) bool {
config.Lock()
// Check for duplicates
for i := range config.Filters {
if config.Filters[i].URL == f.URL {
config.Unlock()
return false
}
}
config.Filters = append(config.Filters, f)
config.Unlock()
return true
}
2019-03-15 19:41:45 +03:00
// Load filters from the disk
// And if any filter has zero ID, assign a new one
func loadFilters() {
for i := range config.Filters {
filter := &config.Filters[i] // otherwise we're operating on a copy
if filter.ID == 0 {
filter.ID = assignUniqueFilterID()
}
if !filter.Enabled {
// No need to load a filter that is not enabled
continue
}
2019-03-15 19:41:45 +03:00
err := filter.load()
if err != nil {
log.Error("Couldn't load filter %d contents due to %s", filter.ID, err)
2019-03-15 19:41:45 +03:00
}
}
}
func deduplicateFilters() {
// Deduplicate filters
i := 0 // output index, used for deletion later
urls := map[string]bool{}
for _, filter := range config.Filters {
if _, ok := urls[filter.URL]; !ok {
// we didn't see it before, keep it
urls[filter.URL] = true // remember the URL
config.Filters[i] = filter
i++
}
}
// all entries we want to keep are at front, delete the rest
config.Filters = config.Filters[:i]
}
// Set the next filter ID to max(filter.ID) + 1
func updateUniqueFilterID(filters []filter) {
for _, filter := range filters {
if nextFilterID < filter.ID {
nextFilterID = filter.ID + 1
}
}
}
func assignUniqueFilterID() int64 {
value := nextFilterID
2019-01-24 20:11:01 +03:00
nextFilterID++
return value
}
// Sets up a timer that will be checking for filters updates periodically
func periodicallyRefreshFilters() {
const maxInterval = 1 * 60 * 60
intval := 5 // use a dynamically increasing time interval
nUpdated := 0
for {
isNetworkErr := false
2019-10-17 14:33:38 +03:00
if config.DNS.FiltersUpdateIntervalHours != 0 && refreshStatus == 0 {
refreshStatus = 1
refreshLock.Lock()
nUpdated, isNetworkErr = refreshFiltersIfNecessary(false)
refreshLock.Unlock()
refreshStatus = 0
if nUpdated != 0 {
intval = maxInterval
}
}
if isNetworkErr {
intval *= 2
if intval > maxInterval {
intval = maxInterval
}
}
time.Sleep(time.Duration(intval) * time.Second)
}
}
// Refresh filters
func refreshFilters() (int, error) {
if refreshStatus != 0 { // we could use atomic cmpxchg here, but it's not really required
return 0, fmt.Errorf("Filters update procedure is already running")
}
refreshStatus = 1
refreshLock.Lock()
nUpdated, _ := refreshFiltersIfNecessary(true)
refreshLock.Unlock()
refreshStatus = 0
return nUpdated, nil
}
// Checks filters updates if necessary
// If force is true, it ignores the filter.LastUpdated field value
//
// Algorithm:
// . Get the list of filters to be updated
// . For each filter run the download and checksum check operation
// . For each filter:
// . If filter data hasn't changed, just set new update time on file
2019-10-21 19:49:56 +03:00
// . If filter data has changed:
// . rename the old file (1.txt -> 1.txt.old)
// . store the new data on disk (1.txt)
// . Pass new filters to dnsfilter object - it analyzes new data while the old filters are still active
// . dnsfilter activates new filters
// . Remove the old filter files (1.txt.old)
//
// Return the number of updated filters
// Return TRUE - there was a network error and nothing could be updated
func refreshFiltersIfNecessary(force bool) (int, bool) {
var updateFilters []filter
var updateFlags []bool // 'true' if filter data has changed
log.Debug("Filters: updating...")
now := time.Now()
config.RLock()
for i := range config.Filters {
f := &config.Filters[i] // otherwise we will be operating on a copy
if !f.Enabled {
continue
}
expireTime := f.LastUpdated.Unix() + int64(config.DNS.FiltersUpdateIntervalHours)*60*60
if !force && expireTime > now.Unix() {
continue
}
var uf filter
uf.ID = f.ID
uf.URL = f.URL
uf.Name = f.Name
uf.checksum = f.checksum
updateFilters = append(updateFilters, uf)
}
config.RUnlock()
nfail := 0
for i := range updateFilters {
uf := &updateFilters[i]
updated, err := uf.update()
2019-07-16 15:29:36 +03:00
updateFlags = append(updateFlags, updated)
if err != nil {
nfail++
log.Printf("Failed to update filter %s: %s\n", uf.URL, err)
continue
}
uf.LastUpdated = now
}
if nfail == len(updateFilters) {
return 0, true
}
updateCount := 0
for i := range updateFilters {
uf := &updateFilters[i]
updated := updateFlags[i]
if updated {
err := uf.saveAndBackupOld()
if err != nil {
log.Printf("Failed to save the updated filter %d: %s", uf.ID, err)
continue
}
} else {
e := os.Chtimes(uf.Path(), uf.LastUpdated, uf.LastUpdated)
if e != nil {
log.Error("os.Chtimes(): %v", e)
}
}
config.Lock()
for k := range config.Filters {
f := &config.Filters[k]
if f.ID != uf.ID || f.URL != uf.URL {
continue
}
f.LastUpdated = uf.LastUpdated
if !updated {
continue
}
log.Info("Updated filter #%d. Rules: %d -> %d",
f.ID, f.RulesCount, uf.RulesCount)
f.Name = uf.Name
f.Data = nil
f.RulesCount = uf.RulesCount
f.checksum = uf.checksum
updateCount++
}
config.Unlock()
}
if updateCount != 0 {
enableFilters(false)
for i := range updateFilters {
uf := &updateFilters[i]
updated := updateFlags[i]
if !updated {
continue
}
_ = os.Remove(uf.Path() + ".old")
2019-01-24 20:11:01 +03:00
}
}
log.Debug("Filters: update finished")
return updateCount, false
}
// Allows printable UTF-8 text with CR, LF, TAB characters
func isPrintableText(data []byte) bool {
for _, c := range data {
if (c >= ' ' && c != 0x7f) || c == '\n' || c == '\r' || c == '\t' {
continue
}
return false
}
return true
}
// A helper function that parses filter contents and returns a number of rules and a filter name (if there's any)
func parseFilterContents(contents []byte) (int, string) {
2019-10-07 19:18:30 +03:00
data := string(contents)
rulesCount := 0
name := ""
seenTitle := false
// Count lines in the filter
2019-10-07 19:18:30 +03:00
for len(data) != 0 {
line := SplitNext(&data, '\n')
2019-03-15 16:02:48 +03:00
if len(line) == 0 {
continue
}
if line[0] == '!' {
m := filterTitleRegexp.FindAllStringSubmatch(line, -1)
if len(m) > 0 && len(m[0]) >= 2 && !seenTitle {
name = m[0][1]
seenTitle = true
}
2019-03-15 16:02:48 +03:00
} else {
rulesCount++
}
}
return rulesCount, name
}
// Perform upgrade on a filter
func (filter *filter) update() (bool, error) {
2019-02-12 19:22:17 +03:00
log.Tracef("Downloading update for filter %d from %s", filter.ID, filter.URL)
resp, err := config.client.Get(filter.URL)
if resp != nil && resp.Body != nil {
defer resp.Body.Close()
}
if err != nil {
log.Printf("Couldn't request filter from URL %s, skipping: %s", filter.URL, err)
return false, err
}
if resp.StatusCode != 200 {
log.Printf("Got status code %d from URL %s, skipping", resp.StatusCode, filter.URL)
return false, fmt.Errorf("got status code != 200: %d", resp.StatusCode)
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Printf("Couldn't fetch filter contents from URL %s, skipping: %s", filter.URL, err)
return false, err
}
// Check if the filter has been really changed
checksum := crc32.ChecksumIEEE(body)
if filter.checksum == checksum {
2019-02-12 19:22:17 +03:00
log.Tracef("Filter #%d at URL %s hasn't changed, not updating it", filter.ID, filter.URL)
return false, nil
}
var firstChunk []byte
if len(body) <= 4096 {
firstChunk = body
} else {
firstChunk = body[:4096]
}
if !isPrintableText(firstChunk) {
return false, fmt.Errorf("Data contains non-printable characters")
}
s := strings.ToLower(string(firstChunk))
if strings.Index(s, "<html") >= 0 ||
strings.Index(s, "<!doctype") >= 0 {
return false, fmt.Errorf("Data is HTML, not plain text")
}
// Extract filter name and count number of rules
rulesCount, filterName := parseFilterContents(body)
log.Printf("Filter %d has been updated: %d bytes, %d rules", filter.ID, len(body), rulesCount)
if filterName != "" {
filter.Name = filterName
}
filter.RulesCount = rulesCount
filter.Data = body
filter.checksum = checksum
return true, nil
}
// saves filter contents to the file in dataDir
// This method is safe to call during filters update,
// because it creates a new file and then renames it,
// so the currently opened file descriptors to the old filter file remain valid.
func (filter *filter) save() error {
filterFilePath := filter.Path()
log.Printf("Saving filter %d contents to: %s", filter.ID, filterFilePath)
err := file.SafeWrite(filterFilePath, filter.Data)
// update LastUpdated field after saving the file
filter.LastUpdated = filter.LastTimeUpdated()
return err
}
func (filter *filter) saveAndBackupOld() error {
filterFilePath := filter.Path()
2019-10-21 19:49:56 +03:00
err := os.Rename(filterFilePath, filterFilePath+".old")
if err != nil && !os.IsNotExist(err) {
2019-10-21 19:49:56 +03:00
return err
}
return filter.save()
}
// loads filter contents from the file in dataDir
func (filter *filter) load() error {
filterFilePath := filter.Path()
2019-02-07 18:24:12 +03:00
log.Tracef("Loading filter %d contents to: %s", filter.ID, filterFilePath)
if _, err := os.Stat(filterFilePath); os.IsNotExist(err) {
// do nothing, file doesn't exist
return err
}
filterFileContents, err := ioutil.ReadFile(filterFilePath)
if err != nil {
return err
}
2019-02-07 18:24:12 +03:00
log.Tracef("File %s, id %d, length %d", filterFilePath, filter.ID, len(filterFileContents))
rulesCount, _ := parseFilterContents(filterFileContents)
filter.RulesCount = rulesCount
filter.Data = nil
filter.checksum = crc32.ChecksumIEEE(filterFileContents)
filter.LastUpdated = filter.LastTimeUpdated()
return nil
}
// Clear filter rules
func (filter *filter) unload() {
filter.Data = nil
filter.RulesCount = 0
}
// Path to the filter contents
func (filter *filter) Path() string {
return filepath.Join(config.getDataDir(), filterDir, strconv.FormatInt(filter.ID, 10)+".txt")
}
// LastTimeUpdated returns the time when the filter was last time updated
func (filter *filter) LastTimeUpdated() time.Time {
filterFilePath := filter.Path()
2019-03-15 16:02:48 +03:00
s, err := os.Stat(filterFilePath)
if os.IsNotExist(err) {
// if the filter file does not exist, return 0001-01-01
return time.Time{}
}
if err != nil {
// if the filter file does not exist, return 0001-01-01
return time.Time{}
}
// filter file modified time
return s.ModTime()
}
func enableFilters(async bool) {
var filters map[int]string
if config.DNS.FilteringEnabled {
// convert array of filters
filters = make(map[int]string)
userFilter := userFilter()
filters[int(userFilter.ID)] = string(userFilter.Data)
for _, filter := range config.Filters {
if !filter.Enabled {
continue
}
filters[int(filter.ID)] = filter.Path()
}
}
Merge: * use upstream servers directly for the internal DNS resolver Close #1212 * Server.Start(config *ServerConfig) -> Start() + Server.Prepare(config *ServerConfig) + Server.Resolve(host string) + Server.Exchange() * rDNS: use internal DNS resolver - clients: fix race in WriteDiskConfig() - fix race: move 'clients' object from 'configuration' to 'HomeContext' Go race detector didn't like our 'clients' object in 'configuration'. + add AGH startup test . Create a configuration file . Start AGH instance . Check Web server . Check DNS server . Wait until the filters are downloaded . Stop and cleanup * move module objects from config.* to Context.* * don't call log.SetLevel() if not necessary This helps to avoid Go race detector's warning * ci.sh: 'make' and then run tests Squashed commit of the following: commit 86500c7f749307f37af4cc8c2a1066f679d0cfad Author: Simon Zolin <s.zolin@adguard.com> Date: Tue Dec 10 18:08:53 2019 +0300 minor commit 6e6abb9dca3cd250c458bec23aa30d2250a9eb40 Author: Simon Zolin <s.zolin@adguard.com> Date: Tue Dec 10 18:08:31 2019 +0300 * ci.sh: 'make' and then run tests commit 114192eefea6800e565ba9ab238202c006516c27 Author: Simon Zolin <s.zolin@adguard.com> Date: Tue Dec 10 17:50:04 2019 +0300 fix commit d426deea7f02cdfd4c7217a38c59e51251956a0f Author: Simon Zolin <s.zolin@adguard.com> Date: Tue Dec 10 17:46:33 2019 +0300 tests commit 7b350edf03027895b4e43dee908d0155a9b0ac9b Author: Simon Zolin <s.zolin@adguard.com> Date: Tue Dec 10 15:56:12 2019 +0300 fix test commit 2f5f116873bbbfdd4bb7f82a596f9e1f5c2bcfd8 Author: Simon Zolin <s.zolin@adguard.com> Date: Tue Dec 10 15:48:56 2019 +0300 fix tests commit 3fbdc77f9c34726e2295185279444983652d559e Author: Simon Zolin <s.zolin@adguard.com> Date: Tue Dec 10 15:45:00 2019 +0300 linter commit 9da0b6965a2b6863bcd552fa83a4de2866600bb8 Author: Simon Zolin <s.zolin@adguard.com> Date: Tue Dec 10 15:33:23 2019 +0300 * config.dnsctx.whois -> Context.whois commit c71ebdbdf6efd88c877b2f243c69d3bc00a997d7 Author: Simon Zolin <s.zolin@adguard.com> Date: Tue Dec 10 15:31:08 2019 +0300 * don't call log.SetLevel() if not necessary This helps to avoid Go race detector's warning commit 0f250220133cefdcb0843a50000cb932802b8324 Author: Simon Zolin <s.zolin@adguard.com> Date: Tue Dec 10 15:28:19 2019 +0300 * rdns: refactor commit c460d8c9414940dac852e390b6c1b4d4fb38dff9 Author: Simon Zolin <s.zolin@adguard.com> Date: Tue Dec 10 14:08:08 2019 +0300 Revert: * stats: serialize access to 'limit' Use 'conf *Config' and update it atomically, as in querylog module. (Note: Race detector still doesn't like it) commit 488bcb884971276de0d5629384b29e22c59ee7e6 Author: Simon Zolin <s.zolin@adguard.com> Date: Tue Dec 10 13:50:23 2019 +0300 * config.dnsFilter -> Context.dnsFilter commit 86c0a6827a450414b50acec7ebfc5220d13b81e4 Author: Simon Zolin <s.zolin@adguard.com> Date: Tue Dec 10 13:45:05 2019 +0300 * config.dnsServer -> Context.dnsServer commit ee35ef095ccaabc89e3de0ef52c9b5ed56b36873 Author: Simon Zolin <s.zolin@adguard.com> Date: Tue Dec 10 13:42:10 2019 +0300 * config.dhcpServer -> Context.dhcpServer commit 1537001cd211099d5fad01696c0b806ae5d257b1 Author: Simon Zolin <s.zolin@adguard.com> Date: Tue Dec 10 13:39:45 2019 +0300 * config.queryLog -> Context.queryLog commit e5955fe4ff1ef6f41763461b37b502ea25a3d04c Author: Simon Zolin <s.zolin@adguard.com> Date: Tue Dec 10 13:03:18 2019 +0300 * config.httpsServer -> Context.httpsServer commit 6153c10a9ac173e159d1f05e0db1512579b9203c Author: Simon Zolin <s.zolin@adguard.com> Date: Mon Dec 9 20:12:24 2019 +0300 * config.httpServer -> Context.httpServer commit abd021fb94039015cd45c97614e8b78d4694f956 Author: Simon Zolin <s.zolin@adguard.com> Date: Mon Dec 9 20:08:05 2019 +0300 * stats: serialize access to 'limit' commit 38c2decfd87c712100edcabe62a6d4518719cb53 Author: Simon Zolin <s.zolin@adguard.com> Date: Mon Dec 9 19:57:04 2019 +0300 * config.stats -> Context.stats commit 6caf8965ad44db9dce9a7a5103aa8fa305ad9a06 Author: Simon Zolin <s.zolin@adguard.com> Date: Mon Dec 9 19:45:23 2019 +0300 fix Restart() ... and 6 more commits
2019-12-11 12:38:58 +03:00
_ = Context.dnsFilter.SetFilters(filters, async)
}