From 134d9275bba7de7d1550412310bc275c52bb340e Mon Sep 17 00:00:00 2001
From: Simon Zolin <s.zolin@adguard.com>
Date: Thu, 4 Jul 2019 14:00:20 +0300
Subject: [PATCH] * use urlfilter v0.4.0

Now we pass filtering rules to urlfilter as filer file names,
 rather than the list of rule strings.
(Note: user rules are still passed as the list of rule strings).

As a result, we don't store the contents of filter files in memory.
---
 dnsfilter/dnsfilter.go   | 48 +++++++++++++++++++++++++++-------------
 dnsforward/dnsforward.go |  6 ++++-
 go.mod                   |  4 ++--
 go.sum                   |  4 ++++
 home/dns.go              |  4 ++--
 home/filter.go           |  7 ++++--
 6 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/dnsfilter/dnsfilter.go b/dnsfilter/dnsfilter.go
index 9d3ca872..10146558 100644
--- a/dnsfilter/dnsfilter.go
+++ b/dnsfilter/dnsfilter.go
@@ -48,13 +48,12 @@ type RequestFilteringSettings struct {
 
 // Config allows you to configure DNS filtering with New() or just change variables directly.
 type Config struct {
-	FilteringTempFilename string `yaml:"filtering_temp_filename"` // temporary file for storing unused filtering rules
-	ParentalSensitivity   int    `yaml:"parental_sensitivity"`    // must be either 3, 10, 13 or 17
-	ParentalEnabled       bool   `yaml:"parental_enabled"`
-	UsePlainHTTP          bool   `yaml:"-"` // use plain HTTP for requests to parental and safe browsing servers
-	SafeSearchEnabled     bool   `yaml:"safesearch_enabled"`
-	SafeBrowsingEnabled   bool   `yaml:"safebrowsing_enabled"`
-	ResolverAddress       string // DNS server address
+	ParentalSensitivity int    `yaml:"parental_sensitivity"` // must be either 3, 10, 13 or 17
+	ParentalEnabled     bool   `yaml:"parental_enabled"`
+	UsePlainHTTP        bool   `yaml:"-"` // use plain HTTP for requests to parental and safe browsing servers
+	SafeSearchEnabled   bool   `yaml:"safesearch_enabled"`
+	SafeBrowsingEnabled bool   `yaml:"safebrowsing_enabled"`
+	ResolverAddress     string // DNS server address
 
 	// Filtering callback function
 	FilterHandler func(clientAddr string, settings *RequestFilteringSettings) `yaml:"-"`
@@ -82,7 +81,7 @@ type Stats struct {
 
 // Dnsfilter holds added rules and performs hostname matches against the rules
 type Dnsfilter struct {
-	rulesStorage    *urlfilter.RulesStorage
+	rulesStorage    *urlfilter.RuleStorage
 	filteringEngine *urlfilter.DNSEngine
 
 	// HTTP lookups for safebrowsing and parental
@@ -95,8 +94,9 @@ type Dnsfilter struct {
 
 // Filter represents a filter list
 type Filter struct {
-	ID   int64  `json:"id"`         // auto-assigned when filter is added (see nextFilterID), json by default keeps ID uppercase but we need lowercase
-	Data []byte `json:"-" yaml:"-"` // List of rules divided by '\n'
+	ID       int64  `json:"id"`         // auto-assigned when filter is added (see nextFilterID), json by default keeps ID uppercase but we need lowercase
+	Data     []byte `json:"-" yaml:"-"` // List of rules divided by '\n'
+	FilePath string `json:"-" yaml:"-"` // Path to a filtering rules file
 }
 
 //go:generate stringer -type=Reason
@@ -527,13 +527,31 @@ func (d *Dnsfilter) lookupCommon(host string, lookupstats *LookupStats, cache gc
 
 // Initialize urlfilter objects
 func (d *Dnsfilter) initFiltering(filters map[int]string) error {
-	var err error
-	d.rulesStorage, err = urlfilter.NewRuleStorage(d.FilteringTempFilename)
-	if err != nil {
-		return err
+	listArray := []urlfilter.RuleList{}
+	for id, dataOrFilePath := range filters {
+		var list urlfilter.RuleList
+		if id == 0 {
+			list = &urlfilter.StringRuleList{
+				ID:             0,
+				RulesText:      dataOrFilePath,
+				IgnoreCosmetic: false,
+			}
+		} else {
+			var err error
+			list, err = urlfilter.NewFileRuleList(id, dataOrFilePath, false)
+			if err != nil {
+				return fmt.Errorf("urlfilter.NewFileRuleList(): %s: %s", dataOrFilePath, err)
+			}
+		}
+		listArray = append(listArray, list)
 	}
 
-	d.filteringEngine = urlfilter.NewDNSEngine(filters, d.rulesStorage)
+	var err error
+	d.rulesStorage, err = urlfilter.NewRuleStorage(listArray)
+	if err != nil {
+		return fmt.Errorf("urlfilter.NewRuleStorage(): %s", err)
+	}
+	d.filteringEngine = urlfilter.NewDNSEngine(d.rulesStorage)
 	return nil
 }
 
diff --git a/dnsforward/dnsforward.go b/dnsforward/dnsforward.go
index 6db87c7a..47da99bc 100644
--- a/dnsforward/dnsforward.go
+++ b/dnsforward/dnsforward.go
@@ -260,7 +260,11 @@ func (s *Server) initDNSFilter() error {
 	if s.conf.FilteringEnabled {
 		filters = make(map[int]string)
 		for _, f := range s.conf.Filters {
-			filters[int(f.ID)] = string(f.Data)
+			if f.ID == 0 {
+				filters[int(f.ID)] = string(f.Data)
+			} else {
+				filters[int(f.ID)] = f.FilePath
+			}
 		}
 	}
 
diff --git a/go.mod b/go.mod
index a8ec3fa5..2377c2fd 100644
--- a/go.mod
+++ b/go.mod
@@ -5,12 +5,12 @@ go 1.12
 require (
 	github.com/AdguardTeam/dnsproxy v0.15.0
 	github.com/AdguardTeam/golibs v0.1.3
-	github.com/AdguardTeam/urlfilter v0.3.0
+	github.com/AdguardTeam/urlfilter v0.4.0
 	github.com/NYTimes/gziphandler v1.1.1
 	github.com/bluele/gcache v0.0.0-20190203144525-2016d595ccb0
 	github.com/go-test/deep v1.0.1
 	github.com/gobuffalo/packr v1.19.0
-	github.com/joomcode/errorx v0.1.0
+	github.com/joomcode/errorx v0.8.0
 	github.com/kardianos/osext v0.0.0-20170510131534-ae77be60afb1 // indirect
 	github.com/kardianos/service v0.0.0-20181115005516-4c239ee84e7b
 	github.com/krolaw/dhcp4 v0.0.0-20180925202202-7cead472c414
diff --git a/go.sum b/go.sum
index c6c78fc2..c0dcd8df 100644
--- a/go.sum
+++ b/go.sum
@@ -5,6 +5,8 @@ github.com/AdguardTeam/golibs v0.1.3 h1:hmapdTtMtIk3T8eQDwTOLdqZLGDKNKk9325uC8z1
 github.com/AdguardTeam/golibs v0.1.3/go.mod h1:b0XkhgIcn2TxwX6C5AQMtpIFAgjPehNgxJErWkwA3ko=
 github.com/AdguardTeam/urlfilter v0.3.0 h1:WNd3uZEYWwxylUuA8QS6V5DqHNsVFw3ZD/E2rd5HGpo=
 github.com/AdguardTeam/urlfilter v0.3.0/go.mod h1:9xfZ6R2vB8LlT8G9LxtbNhDsbr/xybUOSwmJvpXhl/c=
+github.com/AdguardTeam/urlfilter v0.4.0 h1:s4EFwI4+gzBdnATNKbuOY53wS2PCHgFZBv1Ixxva6tg=
+github.com/AdguardTeam/urlfilter v0.4.0/go.mod h1:6YehXZ8e0Hx2MvqeQWLFom6IkPinm04tNhO1CkwAxmg=
 github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I=
 github.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c=
 github.com/StackExchange/wmi v0.0.0-20181212234831-e0a55b97c705 h1:UUppSQnhf4Yc6xGxSkoQpPhb7RVzuv5Nb1mwJ5VId9s=
@@ -47,6 +49,8 @@ github.com/joho/godotenv v1.3.0 h1:Zjp+RcGpHhGlrMbJzXTrZZPrWj+1vfm90La1wgB6Bhc=
 github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg=
 github.com/joomcode/errorx v0.1.0 h1:QmJMiI1DE1UFje2aI1ZWO/VMT5a32qBoXUclGOt8vsc=
 github.com/joomcode/errorx v0.1.0/go.mod h1:kgco15ekB6cs+4Xjzo7SPeXzx38PbJzBwbnu9qfVNHQ=
+github.com/joomcode/errorx v0.8.0 h1:GhAqPtcYuo1O7TOIbtzEIDzPGQ3SrKJ3tdjXNmUtDNo=
+github.com/joomcode/errorx v0.8.0/go.mod h1:kgco15ekB6cs+4Xjzo7SPeXzx38PbJzBwbnu9qfVNHQ=
 github.com/kardianos/osext v0.0.0-20170510131534-ae77be60afb1 h1:PJPDf8OUfOK1bb/NeTKd4f1QXZItOX389VN3B6qC8ro=
 github.com/kardianos/osext v0.0.0-20170510131534-ae77be60afb1/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8=
 github.com/kardianos/service v0.0.0-20181115005516-4c239ee84e7b h1:vfiqKno48aUndBMjTeWFpCExNnTf2Xnd6d228L4EfTQ=
diff --git a/home/dns.go b/home/dns.go
index def74ecc..0a8b3719 100644
--- a/home/dns.go
+++ b/home/dns.go
@@ -180,8 +180,8 @@ func generateServerConfig() dnsforward.ServerConfig {
 	})
 	for _, filter := range config.Filters {
 		filters = append(filters, dnsfilter.Filter{
-			ID:   filter.ID,
-			Data: filter.Data,
+			ID:       filter.ID,
+			FilePath: filter.Path(),
 		})
 	}
 
diff --git a/home/filter.go b/home/filter.go
index 64da0466..c01ce55b 100644
--- a/home/filter.go
+++ b/home/filter.go
@@ -241,7 +241,7 @@ func refreshFiltersIfNecessary(force bool) int {
 			log.Info("Updated filter #%d.  Rules: %d -> %d",
 				f.ID, f.RulesCount, uf.RulesCount)
 			f.Name = uf.Name
-			f.Data = uf.Data
+			f.Data = nil
 			f.RulesCount = uf.RulesCount
 			f.checksum = uf.checksum
 			updateCount++
@@ -339,6 +339,9 @@ func (filter *filter) update() (bool, error) {
 }
 
 // saves filter contents to the file in dataDir
+// This method is safe to call during filters update,
+//  because it creates a new file and then renames it,
+//  so the currently opened file descriptors to the old filter file remain valid.
 func (filter *filter) save() error {
 	filterFilePath := filter.Path()
 	log.Printf("Saving filter %d contents to: %s", filter.ID, filterFilePath)
@@ -369,7 +372,7 @@ func (filter *filter) load() error {
 	rulesCount, _ := parseFilterContents(filterFileContents)
 
 	filter.RulesCount = rulesCount
-	filter.Data = filterFileContents
+	filter.Data = nil
 	filter.checksum = crc32.ChecksumIEEE(filterFileContents)
 	filter.LastUpdated = filter.LastTimeUpdated()