AdGuardHome/internal/querylog/searchcriterion.go
Eugene Burkov 116bedd727 Pull request: 3012 idna search
Merge in DNS/adguard-home from 3012-idna-search to master

Closes #3012.

Squashed commit of the following:

commit 6a9fbfe16860df5db5982a70cfbf040967b6e6ae
Author: Eugene Burkov <e.burkov@adguard.com>
Date:   Tue Jun 29 21:28:10 2021 +0300

    querylog: add todo

commit 31292ba1aeb9e91ff4f6abae7ffdf806a87cae66
Author: Eugene Burkov <e.burkov@adguard.com>
Date:   Tue Jun 29 21:21:46 2021 +0300

    querylog: imp docs, code

commit 35757f76837cb8034f6079a351d01aa4706bfea7
Author: Eugene Burkov <e.burkov@adguard.com>
Date:   Tue Jun 29 21:01:08 2021 +0300

    queerylog: fix idn case match

commit eecfc98b6449c5c7c5a23602e80e47002034bc25
Author: Eugene Burkov <e.burkov@adguard.com>
Date:   Tue Jun 29 20:32:00 2021 +0300

    querylog: imp code, docs

commit 8aa6242fe92a9c2daa674b976595b13be96b0cf7
Author: Eugene Burkov <e.burkov@adguard.com>
Date:   Tue Jun 29 20:00:54 2021 +0300

    querylog: sup idn search
2021-06-30 11:04:48 +03:00

240 lines
6.2 KiB
Go

package querylog
import (
"strings"
"unicode"
"unicode/utf8"
"github.com/AdguardTeam/AdGuardHome/internal/filtering"
)
type criterionType int
const (
// ctTerm is for searching by the domain name, the client's IP address,
// the client's ID or the client's name. The domain name search
// supports IDNAs.
ctTerm criterionType = iota
// ctFilteringStatus is for searching by the filtering status.
//
// See (*searchCriterion).ctFilteringStatusCase for details.
ctFilteringStatus
)
const (
filteringStatusAll = "all"
filteringStatusFiltered = "filtered" // all kinds of filtering
filteringStatusBlocked = "blocked" // blocked or blocked services
filteringStatusBlockedService = "blocked_services" // blocked
filteringStatusBlockedSafebrowsing = "blocked_safebrowsing" // blocked by safebrowsing
filteringStatusBlockedParental = "blocked_parental" // blocked by parental control
filteringStatusWhitelisted = "whitelisted" // whitelisted
filteringStatusRewritten = "rewritten" // all kinds of rewrites
filteringStatusSafeSearch = "safe_search" // enforced safe search
filteringStatusProcessed = "processed" // not blocked, not white-listed entries
)
// filteringStatusValues -- array with all possible filteringStatus values
var filteringStatusValues = []string{
filteringStatusAll, filteringStatusFiltered, filteringStatusBlocked,
filteringStatusBlockedService, filteringStatusBlockedSafebrowsing, filteringStatusBlockedParental,
filteringStatusWhitelisted, filteringStatusRewritten, filteringStatusSafeSearch,
filteringStatusProcessed,
}
// searchCriterion is a search criterion that is used to match a record.
type searchCriterion struct {
value string
asciiVal string
criterionType criterionType
// strict, if true, means that the criterion must be applied to the
// whole value rather than the part of it. That is, equality and not
// containment.
strict bool
}
func ctDomainOrClientCaseStrict(
term string,
asciiTerm string,
clientID string,
name string,
host string,
ip string,
) (ok bool) {
return strings.EqualFold(host, term) ||
(asciiTerm != "" && strings.EqualFold(host, asciiTerm)) ||
strings.EqualFold(clientID, term) ||
strings.EqualFold(ip, term) ||
strings.EqualFold(name, term)
}
// containsFold reports whehter s contains, ignoring letter case, substr.
//
// TODO(a.garipov): Move to aghstrings if needed elsewhere.
func containsFold(s, substr string) (ok bool) {
sLen, substrLen := len(s), len(substr)
if sLen < substrLen {
return false
}
if sLen == substrLen {
return strings.EqualFold(s, substr)
}
first, _ := utf8.DecodeRuneInString(substr)
firstFolded := unicode.SimpleFold(first)
for i := 0; i != -1 && len(s) >= len(substr); {
if strings.EqualFold(s[:substrLen], substr) {
return true
}
i = strings.IndexFunc(s[1:], func(r rune) (eq bool) {
return r == first || r == firstFolded
})
s = s[1+i:]
}
return false
}
func ctDomainOrClientCaseNonStrict(
term string,
asciiTerm string,
clientID string,
name string,
host string,
ip string,
) (ok bool) {
return containsFold(clientID, term) ||
containsFold(host, term) ||
(asciiTerm != "" && containsFold(host, asciiTerm)) ||
containsFold(ip, term) ||
containsFold(name, term)
}
// quickMatch quickly checks if the line matches the given search criterion.
// It returns false if the like doesn't match. This method is only here for
// optimisation purposes.
func (c *searchCriterion) quickMatch(line string, findClient quickMatchClientFunc) (ok bool) {
switch c.criterionType {
case ctTerm:
host := readJSONValue(line, `"QH":"`)
ip := readJSONValue(line, `"IP":"`)
clientID := readJSONValue(line, `"CID":"`)
var name string
if cli := findClient(clientID, ip); cli != nil {
name = cli.Name
}
if c.strict {
return ctDomainOrClientCaseStrict(
c.value,
c.asciiVal,
clientID,
name,
host,
ip,
)
}
return ctDomainOrClientCaseNonStrict(
c.value,
c.asciiVal,
clientID,
name,
host,
ip,
)
case ctFilteringStatus:
// Go on, as we currently don't do quick matches against
// filtering statuses.
return true
default:
return true
}
}
// match checks if the log entry matches this search criterion.
func (c *searchCriterion) match(entry *logEntry) bool {
switch c.criterionType {
case ctTerm:
return c.ctDomainOrClientCase(entry)
case ctFilteringStatus:
return c.ctFilteringStatusCase(entry.Result)
}
return false
}
func (c *searchCriterion) ctDomainOrClientCase(e *logEntry) bool {
clientID := e.ClientID
host := e.QHost
var name string
if e.client != nil {
name = e.client.Name
}
ip := e.IP.String()
if c.strict {
return ctDomainOrClientCaseStrict(c.value, c.asciiVal, clientID, name, host, ip)
}
return ctDomainOrClientCaseNonStrict(c.value, c.asciiVal, clientID, name, host, ip)
}
func (c *searchCriterion) ctFilteringStatusCase(res filtering.Result) bool {
switch c.value {
case filteringStatusAll:
return true
case filteringStatusFiltered:
return res.IsFiltered ||
res.Reason.In(
filtering.NotFilteredAllowList,
filtering.Rewritten,
filtering.RewrittenAutoHosts,
filtering.RewrittenRule,
)
case filteringStatusBlocked:
return res.IsFiltered &&
res.Reason.In(filtering.FilteredBlockList, filtering.FilteredBlockedService)
case filteringStatusBlockedService:
return res.IsFiltered && res.Reason == filtering.FilteredBlockedService
case filteringStatusBlockedParental:
return res.IsFiltered && res.Reason == filtering.FilteredParental
case filteringStatusBlockedSafebrowsing:
return res.IsFiltered && res.Reason == filtering.FilteredSafeBrowsing
case filteringStatusWhitelisted:
return res.Reason == filtering.NotFilteredAllowList
case filteringStatusRewritten:
return res.Reason.In(
filtering.Rewritten,
filtering.RewrittenAutoHosts,
filtering.RewrittenRule,
)
case filteringStatusSafeSearch:
return res.IsFiltered && res.Reason == filtering.FilteredSafeSearch
case filteringStatusProcessed:
return !res.Reason.In(
filtering.FilteredBlockList,
filtering.FilteredBlockedService,
filtering.NotFilteredAllowList,
)
default:
return false
}
}