Pull request: 3012 idna search

Merge in DNS/adguard-home from 3012-idna-search to master

Closes #3012.

Squashed commit of the following:

commit 6a9fbfe16860df5db5982a70cfbf040967b6e6ae
Author: Eugene Burkov <e.burkov@adguard.com>
Date:   Tue Jun 29 21:28:10 2021 +0300

    querylog: add todo

commit 31292ba1aeb9e91ff4f6abae7ffdf806a87cae66
Author: Eugene Burkov <e.burkov@adguard.com>
Date:   Tue Jun 29 21:21:46 2021 +0300

    querylog: imp docs, code

commit 35757f76837cb8034f6079a351d01aa4706bfea7
Author: Eugene Burkov <e.burkov@adguard.com>
Date:   Tue Jun 29 21:01:08 2021 +0300

    queerylog: fix idn case match

commit eecfc98b6449c5c7c5a23602e80e47002034bc25
Author: Eugene Burkov <e.burkov@adguard.com>
Date:   Tue Jun 29 20:32:00 2021 +0300

    querylog: imp code, docs

commit 8aa6242fe92a9c2daa674b976595b13be96b0cf7
Author: Eugene Burkov <e.burkov@adguard.com>
Date:   Tue Jun 29 20:00:54 2021 +0300

    querylog: sup idn search
This commit is contained in:
Eugene Burkov 2021-06-30 11:04:48 +03:00
parent 232cd381ff
commit 116bedd727
3 changed files with 83 additions and 30 deletions

View file

@ -40,6 +40,7 @@ and this project adheres to
### Changed
- Query log search now supports internationalized domains ([#3012]).
- Internationalized domains are now shown decoded in the query log with the
original encoded version shown in request details. ([#3013]).
- When /etc/hosts-type rules have several IPs for one host, all IPs are now
@ -83,6 +84,7 @@ released by then.
[#2443]: https://github.com/AdguardTeam/AdGuardHome/issues/2443
[#2624]: https://github.com/AdguardTeam/AdGuardHome/issues/2624
[#2763]: https://github.com/AdguardTeam/AdGuardHome/issues/2763
[#3012]: https://github.com/AdguardTeam/AdGuardHome/issues/3012
[#3013]: https://github.com/AdguardTeam/AdGuardHome/issues/3013
[#3136]: https://github.com/AdguardTeam/AdGuardHome/issues/3136
[#3162]: https://github.com/AdguardTeam/AdGuardHome/issues/3162

View file

@ -6,11 +6,13 @@ import (
"net/http"
"net/url"
"strconv"
"strings"
"time"
"github.com/AdguardTeam/AdGuardHome/internal/aghstrings"
"github.com/AdguardTeam/golibs/jsonutil"
"github.com/AdguardTeam/golibs/log"
"golang.org/x/net/idna"
)
type qlogConfig struct {
@ -127,25 +129,53 @@ func getDoubleQuotesEnclosedValue(s *string) bool {
}
// parseSearchCriterion parses a search criterion from the query parameter.
func (l *queryLog) parseSearchCriterion(q url.Values, name string, ct criterionType) (ok bool, sc searchCriterion, err error) {
func (l *queryLog) parseSearchCriterion(q url.Values, name string, ct criterionType) (
ok bool,
sc searchCriterion,
err error,
) {
val := q.Get(name)
if len(val) == 0 {
return false, searchCriterion{}, nil
if val == "" {
return false, sc, nil
}
c := searchCriterion{
strict := getDoubleQuotesEnclosedValue(&val)
var asciiVal string
switch ct {
case ctTerm:
// Decode lowercased value from punycode to make EqualFold and
// friends work properly with IDNAs.
//
// TODO(e.burkov): Make it work with parts of IDNAs somehow.
loweredVal := strings.ToLower(val)
if asciiVal, err = idna.ToASCII(loweredVal); err != nil {
log.Debug("can't convert %q to ascii: %s", val, err)
} else if asciiVal == loweredVal {
// Purge asciiVal to prevent checking the same value
// twice.
asciiVal = ""
}
case ctFilteringStatus:
if !aghstrings.InSlice(filteringStatusValues, val) {
return false, sc, fmt.Errorf("invalid value %s", val)
}
default:
return false, sc, fmt.Errorf(
"invalid criterion type %v: should be one of %v",
ct,
[]criterionType{ctTerm, ctFilteringStatus},
)
}
sc = searchCriterion{
criterionType: ct,
value: val,
}
if getDoubleQuotesEnclosedValue(&c.value) {
c.strict = true
asciiVal: asciiVal,
strict: strict,
}
if ct == ctFilteringStatus && !aghstrings.InSlice(filteringStatusValues, c.value) {
return false, c, fmt.Errorf("invalid value %s", c.value)
}
return true, c, nil
return true, sc, nil
}
// parseSearchParams - parses "searchParams" from the HTTP request's query string
@ -175,15 +205,19 @@ func (l *queryLog) parseSearchParams(r *http.Request) (p *searchParams, err erro
p.maxFileScanEntries = 0
}
paramNames := map[string]criterionType{
"search": ctTerm,
"response_status": ctFilteringStatus,
}
for k, v := range paramNames {
for _, v := range []struct {
urlField string
ct criterionType
}{{
urlField: "search",
ct: ctTerm,
}, {
urlField: "response_status",
ct: ctFilteringStatus,
}} {
var ok bool
var c searchCriterion
ok, c, err = l.parseSearchCriterion(q, k, v)
ok, c, err = l.parseSearchCriterion(q, v.urlField, v.ct)
if err != nil {
return nil, err
}

View file

@ -11,10 +11,9 @@ import (
type criterionType int
const (
// ctTerm is for searching by the domain name, the client's IP
// address, the client's ID or the client's name.
//
// TODO(e.burkov): Make it support IDNA while #3012.
// ctTerm is for searching by the domain name, the client's IP address,
// the client's ID or the client's name. The domain name search
// supports IDNAs.
ctTerm criterionType = iota
// ctFilteringStatus is for searching by the filtering status.
//
@ -47,6 +46,7 @@ var filteringStatusValues = []string{
// searchCriterion is a search criterion that is used to match a record.
type searchCriterion struct {
value string
asciiVal string
criterionType criterionType
// strict, if true, means that the criterion must be applied to the
// whole value rather than the part of it. That is, equality and not
@ -54,14 +54,16 @@ type searchCriterion struct {
strict bool
}
func (c *searchCriterion) ctDomainOrClientCaseStrict(
func ctDomainOrClientCaseStrict(
term string,
asciiTerm string,
clientID string,
name string,
host string,
ip string,
) (ok bool) {
return strings.EqualFold(host, term) ||
(asciiTerm != "" && strings.EqualFold(host, asciiTerm)) ||
strings.EqualFold(clientID, term) ||
strings.EqualFold(ip, term) ||
strings.EqualFold(name, term)
@ -98,8 +100,9 @@ func containsFold(s, substr string) (ok bool) {
return false
}
func (c *searchCriterion) ctDomainOrClientCaseNonStrict(
func ctDomainOrClientCaseNonStrict(
term string,
asciiTerm string,
clientID string,
name string,
host string,
@ -107,6 +110,7 @@ func (c *searchCriterion) ctDomainOrClientCaseNonStrict(
) (ok bool) {
return containsFold(clientID, term) ||
containsFold(host, term) ||
(asciiTerm != "" && containsFold(host, asciiTerm)) ||
containsFold(ip, term) ||
containsFold(name, term)
}
@ -127,10 +131,24 @@ func (c *searchCriterion) quickMatch(line string, findClient quickMatchClientFun
}
if c.strict {
return c.ctDomainOrClientCaseStrict(c.value, clientID, name, host, ip)
return ctDomainOrClientCaseStrict(
c.value,
c.asciiVal,
clientID,
name,
host,
ip,
)
}
return c.ctDomainOrClientCaseNonStrict(c.value, clientID, name, host, ip)
return ctDomainOrClientCaseNonStrict(
c.value,
c.asciiVal,
clientID,
name,
host,
ip,
)
case ctFilteringStatus:
// Go on, as we currently don't do quick matches against
// filtering statuses.
@ -162,12 +180,11 @@ func (c *searchCriterion) ctDomainOrClientCase(e *logEntry) bool {
}
ip := e.IP.String()
term := strings.ToLower(c.value)
if c.strict {
return c.ctDomainOrClientCaseStrict(term, clientID, name, host, ip)
return ctDomainOrClientCaseStrict(c.value, c.asciiVal, clientID, name, host, ip)
}
return c.ctDomainOrClientCaseNonStrict(term, clientID, name, host, ip)
return ctDomainOrClientCaseNonStrict(c.value, c.asciiVal, clientID, name, host, ip)
}
func (c *searchCriterion) ctFilteringStatusCase(res filtering.Result) bool {