From cb97a254a56e79a5ab14b011f6406bd32331665f Mon Sep 17 00:00:00 2001
From: Eugene Bujak <hmage@hmage.net>
Date: Thu, 4 Oct 2018 13:19:43 +0300
Subject: [PATCH] dnsfilter -- avoid using regexps when simple suffix match is
 enough.

This covers 96.98% of all adguard dns rules.
---
 dnsfilter/dnsfilter.go      | 26 +++++++++++++++++++++++--
 dnsfilter/dnsfilter_test.go | 34 +++++++++++++++++++++++++++++++-
 dnsfilter/rule_to_regexp.go | 39 +++++++++++++++++++++++++++++++++++--
 3 files changed, 94 insertions(+), 5 deletions(-)

diff --git a/dnsfilter/dnsfilter.go b/dnsfilter/dnsfilter.go
index 5b4c66fc..b40d6a38 100644
--- a/dnsfilter/dnsfilter.go
+++ b/dnsfilter/dnsfilter.go
@@ -67,6 +67,10 @@ type rule struct {
 	// user-supplied data
 	listID uint32
 
+	// suffix matching
+	isSuffix bool
+	suffix   string
+
 	// compiled regexp
 	compiled *regexp.Regexp
 
@@ -387,12 +391,21 @@ func (rule *rule) extractShortcut() {
 
 func (rule *rule) compile() error {
 	rule.RLock()
-	isCompiled := rule.compiled != nil
+	isCompiled := rule.isSuffix || rule.compiled != nil
 	rule.RUnlock()
 	if isCompiled {
 		return nil
 	}
 
+	isSuffix, suffix := getSuffix(rule.text)
+	if isSuffix {
+		rule.Lock()
+		rule.isSuffix = isSuffix
+		rule.suffix = suffix
+		rule.Unlock()
+		return nil
+	}
+
 	expr, err := ruleToRegexp(rule.text)
 	if err != nil {
 		return err
@@ -417,7 +430,16 @@ func (rule *rule) match(host string) (Result, error) {
 		return res, err
 	}
 	rule.RLock()
-	matched := rule.compiled.MatchString(host)
+	matched := false
+	if rule.isSuffix {
+		if host == rule.suffix {
+			matched = true
+		} else if strings.HasSuffix(host, "."+rule.suffix) {
+			matched = true
+		}
+	} else {
+		matched = rule.compiled.MatchString(host)
+	}
 	rule.RUnlock()
 	if matched {
 		res.Reason = FilteredBlackList
diff --git a/dnsfilter/dnsfilter_test.go b/dnsfilter/dnsfilter_test.go
index 61d0a4ae..19ae1c77 100644
--- a/dnsfilter/dnsfilter_test.go
+++ b/dnsfilter/dnsfilter_test.go
@@ -195,7 +195,7 @@ func TestRuleToRegexp(t *testing.T) {
 		{"/doubleclick/", "doubleclick", nil},
 		{"/", "", ErrInvalidSyntax},
 		{`|double*?.+[]|(){}#$\|`, `^double.*\?\.\+\[\]\|\(\)\{\}\#\$\\$`, nil},
-		{`||doubleclick.net^`, `^([a-z0-9-_.]+\.)?doubleclick\.net([^ a-zA-Z0-9.%]|$)`, nil},
+		{`||doubleclick.net^`, `(?:^|\.)doubleclick\.net$`, nil},
 	}
 	for _, testcase := range tests {
 		converted, err := ruleToRegexp(testcase.rule)
@@ -208,6 +208,38 @@ func TestRuleToRegexp(t *testing.T) {
 	}
 }
 
+func TestSuffixRule(t *testing.T) {
+	for _, testcase := range []struct {
+		rule     string
+		isSuffix bool
+		suffix   string
+	}{
+		{`||doubleclick.net^`, true, `doubleclick.net`}, // entire string or subdomain match
+		{`||doubleclick.net|`, true, `doubleclick.net`}, // entire string or subdomain match
+		{`|doubleclick.net^`, false, ``},                // TODO: ends with doubleclick.net
+		{`*doubleclick.net^`, false, ``},                // TODO: ends with doubleclick.net
+		{`doubleclick.net^`, false, ``},                 // TODO: ends with doubleclick.net
+		{`|*doubleclick.net^`, false, ``},               // TODO: ends with doubleclick.net
+		{`||*doubleclick.net^`, false, ``},              // TODO: ends with doubleclick.net
+		{`||*doubleclick.net|`, false, ``},              // TODO: ends with doubleclick.net
+		{`||*doublec*lick.net^`, false, ``},             // has a wildcard inside, has to be regexp
+		{`||*doublec|lick.net^`, false, ``},             // has a special symbol inside, has to be regexp
+		{`/abracadabra/`, false, ``},                    // regexp, not anchored
+		{`/abracadabra$/`, false, ``},                   // TODO: simplify simple suffix regexes
+	} {
+		isSuffix, suffix := getSuffix(testcase.rule)
+		if testcase.isSuffix != isSuffix {
+			t.Errorf("Results do not match for \"%s\": got %v expected %v", testcase.rule, isSuffix, testcase.isSuffix)
+			continue
+		}
+		if testcase.isSuffix && testcase.suffix != suffix {
+			t.Errorf("Result suffix does not match for \"%s\": got \"%s\" expected \"%s\"", testcase.rule, suffix, testcase.suffix)
+			continue
+		}
+		// trace("\"%s\": %v: %s", testcase.rule, isSuffix, suffix)
+	}
+}
+
 //
 // helper functions
 //
diff --git a/dnsfilter/rule_to_regexp.go b/dnsfilter/rule_to_regexp.go
index 87fe3792..79c0320d 100644
--- a/dnsfilter/rule_to_regexp.go
+++ b/dnsfilter/rule_to_regexp.go
@@ -5,8 +5,8 @@ import (
 )
 
 func ruleToRegexp(rule string) (string, error) {
-	const hostStart = "^([a-z0-9-_.]+\\.)?"
-	const hostEnd = "([^ a-zA-Z0-9.%]|$)"
+	const hostStart = `(?:^|\.)`
+	const hostEnd = `$`
 
 	// empty or short rule -- do nothing
 	if !isValidRule(rule) {
@@ -49,3 +49,38 @@ func ruleToRegexp(rule string) (string, error) {
 
 	return sb.String(), nil
 }
+
+// handle suffix rule ||example.com^ -- either entire string is example.com or *.example.com
+func getSuffix(rule string) (bool, string) {
+	// if starts with / and ends with /, it's already a regexp
+	// TODO: if a regexp is simple `/abracadabra$/`, then simplify it maybe?
+	if rule[0] == '/' && rule[len(rule)-1] == '/' {
+		return false, ""
+	}
+
+	// must start with ||
+	if rule[0] != '|' || rule[1] != '|' {
+		return false, ""
+	}
+	rule = rule[2:]
+
+	// suffix rule must end with ^ or |
+	lastChar := rule[len(rule)-1]
+	if lastChar != '^' && lastChar != '|' {
+		return false, ""
+	}
+	// last char was checked, eat it
+	rule = rule[:len(rule)-1]
+
+	// check that it doesn't have any special characters inside
+	for _, r := range rule {
+		switch r {
+		case '|':
+			return false, ""
+		case '*':
+			return false, ""
+		}
+	}
+
+	return true, rule
+}