diff --git a/dnsfilter/dnsfilter.go b/dnsfilter/dnsfilter.go index 5b4c66fc..b40d6a38 100644 --- a/dnsfilter/dnsfilter.go +++ b/dnsfilter/dnsfilter.go @@ -67,6 +67,10 @@ type rule struct { // user-supplied data listID uint32 + // suffix matching + isSuffix bool + suffix string + // compiled regexp compiled *regexp.Regexp @@ -387,12 +391,21 @@ func (rule *rule) extractShortcut() { func (rule *rule) compile() error { rule.RLock() - isCompiled := rule.compiled != nil + isCompiled := rule.isSuffix || rule.compiled != nil rule.RUnlock() if isCompiled { return nil } + isSuffix, suffix := getSuffix(rule.text) + if isSuffix { + rule.Lock() + rule.isSuffix = isSuffix + rule.suffix = suffix + rule.Unlock() + return nil + } + expr, err := ruleToRegexp(rule.text) if err != nil { return err @@ -417,7 +430,16 @@ func (rule *rule) match(host string) (Result, error) { return res, err } rule.RLock() - matched := rule.compiled.MatchString(host) + matched := false + if rule.isSuffix { + if host == rule.suffix { + matched = true + } else if strings.HasSuffix(host, "."+rule.suffix) { + matched = true + } + } else { + matched = rule.compiled.MatchString(host) + } rule.RUnlock() if matched { res.Reason = FilteredBlackList diff --git a/dnsfilter/dnsfilter_test.go b/dnsfilter/dnsfilter_test.go index 61d0a4ae..19ae1c77 100644 --- a/dnsfilter/dnsfilter_test.go +++ b/dnsfilter/dnsfilter_test.go @@ -195,7 +195,7 @@ func TestRuleToRegexp(t *testing.T) { {"/doubleclick/", "doubleclick", nil}, {"/", "", ErrInvalidSyntax}, {`|double*?.+[]|(){}#$\|`, `^double.*\?\.\+\[\]\|\(\)\{\}\#\$\\$`, nil}, - {`||doubleclick.net^`, `^([a-z0-9-_.]+\.)?doubleclick\.net([^ a-zA-Z0-9.%]|$)`, nil}, + {`||doubleclick.net^`, `(?:^|\.)doubleclick\.net$`, nil}, } for _, testcase := range tests { converted, err := ruleToRegexp(testcase.rule) @@ -208,6 +208,38 @@ func TestRuleToRegexp(t *testing.T) { } } +func TestSuffixRule(t *testing.T) { + for _, testcase := range []struct { + rule string + isSuffix bool + suffix string + }{ + {`||doubleclick.net^`, true, `doubleclick.net`}, // entire string or subdomain match + {`||doubleclick.net|`, true, `doubleclick.net`}, // entire string or subdomain match + {`|doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net + {`*doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net + {`doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net + {`|*doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net + {`||*doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net + {`||*doubleclick.net|`, false, ``}, // TODO: ends with doubleclick.net + {`||*doublec*lick.net^`, false, ``}, // has a wildcard inside, has to be regexp + {`||*doublec|lick.net^`, false, ``}, // has a special symbol inside, has to be regexp + {`/abracadabra/`, false, ``}, // regexp, not anchored + {`/abracadabra$/`, false, ``}, // TODO: simplify simple suffix regexes + } { + isSuffix, suffix := getSuffix(testcase.rule) + if testcase.isSuffix != isSuffix { + t.Errorf("Results do not match for \"%s\": got %v expected %v", testcase.rule, isSuffix, testcase.isSuffix) + continue + } + if testcase.isSuffix && testcase.suffix != suffix { + t.Errorf("Result suffix does not match for \"%s\": got \"%s\" expected \"%s\"", testcase.rule, suffix, testcase.suffix) + continue + } + // trace("\"%s\": %v: %s", testcase.rule, isSuffix, suffix) + } +} + // // helper functions // diff --git a/dnsfilter/rule_to_regexp.go b/dnsfilter/rule_to_regexp.go index 87fe3792..79c0320d 100644 --- a/dnsfilter/rule_to_regexp.go +++ b/dnsfilter/rule_to_regexp.go @@ -5,8 +5,8 @@ import ( ) func ruleToRegexp(rule string) (string, error) { - const hostStart = "^([a-z0-9-_.]+\\.)?" - const hostEnd = "([^ a-zA-Z0-9.%]|$)" + const hostStart = `(?:^|\.)` + const hostEnd = `$` // empty or short rule -- do nothing if !isValidRule(rule) { @@ -49,3 +49,38 @@ func ruleToRegexp(rule string) (string, error) { return sb.String(), nil } + +// handle suffix rule ||example.com^ -- either entire string is example.com or *.example.com +func getSuffix(rule string) (bool, string) { + // if starts with / and ends with /, it's already a regexp + // TODO: if a regexp is simple `/abracadabra$/`, then simplify it maybe? + if rule[0] == '/' && rule[len(rule)-1] == '/' { + return false, "" + } + + // must start with || + if rule[0] != '|' || rule[1] != '|' { + return false, "" + } + rule = rule[2:] + + // suffix rule must end with ^ or | + lastChar := rule[len(rule)-1] + if lastChar != '^' && lastChar != '|' { + return false, "" + } + // last char was checked, eat it + rule = rule[:len(rule)-1] + + // check that it doesn't have any special characters inside + for _, r := range rule { + switch r { + case '|': + return false, "" + case '*': + return false, "" + } + } + + return true, rule +}