Pull request: 3012 idna search

Merge in DNS/adguard-home from 3012-idna-search to master

Closes #3012.

Squashed commit of the following:

commit 6a9fbfe16860df5db5982a70cfbf040967b6e6ae
Author: Eugene Burkov <e.burkov@adguard.com>
Date:   Tue Jun 29 21:28:10 2021 +0300

    querylog: add todo

commit 31292ba1aeb9e91ff4f6abae7ffdf806a87cae66
Author: Eugene Burkov <e.burkov@adguard.com>
Date:   Tue Jun 29 21:21:46 2021 +0300

    querylog: imp docs, code

commit 35757f76837cb8034f6079a351d01aa4706bfea7
Author: Eugene Burkov <e.burkov@adguard.com>
Date:   Tue Jun 29 21:01:08 2021 +0300

    queerylog: fix idn case match

commit eecfc98b6449c5c7c5a23602e80e47002034bc25
Author: Eugene Burkov <e.burkov@adguard.com>
Date:   Tue Jun 29 20:32:00 2021 +0300

    querylog: imp code, docs

commit 8aa6242fe92a9c2daa674b976595b13be96b0cf7
Author: Eugene Burkov <e.burkov@adguard.com>
Date:   Tue Jun 29 20:00:54 2021 +0300

    querylog: sup idn search
This commit is contained in:
Eugene Burkov 2021-06-30 11:04:48 +03:00
parent 232cd381ff
commit 116bedd727
3 changed files with 83 additions and 30 deletions

View file

@ -40,6 +40,7 @@ and this project adheres to
### Changed ### Changed
- Query log search now supports internationalized domains ([#3012]).
- Internationalized domains are now shown decoded in the query log with the - Internationalized domains are now shown decoded in the query log with the
original encoded version shown in request details. ([#3013]). original encoded version shown in request details. ([#3013]).
- When /etc/hosts-type rules have several IPs for one host, all IPs are now - When /etc/hosts-type rules have several IPs for one host, all IPs are now
@ -83,6 +84,7 @@ released by then.
[#2443]: https://github.com/AdguardTeam/AdGuardHome/issues/2443 [#2443]: https://github.com/AdguardTeam/AdGuardHome/issues/2443
[#2624]: https://github.com/AdguardTeam/AdGuardHome/issues/2624 [#2624]: https://github.com/AdguardTeam/AdGuardHome/issues/2624
[#2763]: https://github.com/AdguardTeam/AdGuardHome/issues/2763 [#2763]: https://github.com/AdguardTeam/AdGuardHome/issues/2763
[#3012]: https://github.com/AdguardTeam/AdGuardHome/issues/3012
[#3013]: https://github.com/AdguardTeam/AdGuardHome/issues/3013 [#3013]: https://github.com/AdguardTeam/AdGuardHome/issues/3013
[#3136]: https://github.com/AdguardTeam/AdGuardHome/issues/3136 [#3136]: https://github.com/AdguardTeam/AdGuardHome/issues/3136
[#3162]: https://github.com/AdguardTeam/AdGuardHome/issues/3162 [#3162]: https://github.com/AdguardTeam/AdGuardHome/issues/3162

View file

@ -6,11 +6,13 @@ import (
"net/http" "net/http"
"net/url" "net/url"
"strconv" "strconv"
"strings"
"time" "time"
"github.com/AdguardTeam/AdGuardHome/internal/aghstrings" "github.com/AdguardTeam/AdGuardHome/internal/aghstrings"
"github.com/AdguardTeam/golibs/jsonutil" "github.com/AdguardTeam/golibs/jsonutil"
"github.com/AdguardTeam/golibs/log" "github.com/AdguardTeam/golibs/log"
"golang.org/x/net/idna"
) )
type qlogConfig struct { type qlogConfig struct {
@ -127,25 +129,53 @@ func getDoubleQuotesEnclosedValue(s *string) bool {
} }
// parseSearchCriterion parses a search criterion from the query parameter. // parseSearchCriterion parses a search criterion from the query parameter.
func (l *queryLog) parseSearchCriterion(q url.Values, name string, ct criterionType) (ok bool, sc searchCriterion, err error) { func (l *queryLog) parseSearchCriterion(q url.Values, name string, ct criterionType) (
ok bool,
sc searchCriterion,
err error,
) {
val := q.Get(name) val := q.Get(name)
if len(val) == 0 { if val == "" {
return false, searchCriterion{}, nil return false, sc, nil
} }
c := searchCriterion{ strict := getDoubleQuotesEnclosedValue(&val)
var asciiVal string
switch ct {
case ctTerm:
// Decode lowercased value from punycode to make EqualFold and
// friends work properly with IDNAs.
//
// TODO(e.burkov): Make it work with parts of IDNAs somehow.
loweredVal := strings.ToLower(val)
if asciiVal, err = idna.ToASCII(loweredVal); err != nil {
log.Debug("can't convert %q to ascii: %s", val, err)
} else if asciiVal == loweredVal {
// Purge asciiVal to prevent checking the same value
// twice.
asciiVal = ""
}
case ctFilteringStatus:
if !aghstrings.InSlice(filteringStatusValues, val) {
return false, sc, fmt.Errorf("invalid value %s", val)
}
default:
return false, sc, fmt.Errorf(
"invalid criterion type %v: should be one of %v",
ct,
[]criterionType{ctTerm, ctFilteringStatus},
)
}
sc = searchCriterion{
criterionType: ct, criterionType: ct,
value: val, value: val,
} asciiVal: asciiVal,
if getDoubleQuotesEnclosedValue(&c.value) { strict: strict,
c.strict = true
} }
if ct == ctFilteringStatus && !aghstrings.InSlice(filteringStatusValues, c.value) { return true, sc, nil
return false, c, fmt.Errorf("invalid value %s", c.value)
}
return true, c, nil
} }
// parseSearchParams - parses "searchParams" from the HTTP request's query string // parseSearchParams - parses "searchParams" from the HTTP request's query string
@ -175,15 +205,19 @@ func (l *queryLog) parseSearchParams(r *http.Request) (p *searchParams, err erro
p.maxFileScanEntries = 0 p.maxFileScanEntries = 0
} }
paramNames := map[string]criterionType{ for _, v := range []struct {
"search": ctTerm, urlField string
"response_status": ctFilteringStatus, ct criterionType
} }{{
urlField: "search",
for k, v := range paramNames { ct: ctTerm,
}, {
urlField: "response_status",
ct: ctFilteringStatus,
}} {
var ok bool var ok bool
var c searchCriterion var c searchCriterion
ok, c, err = l.parseSearchCriterion(q, k, v) ok, c, err = l.parseSearchCriterion(q, v.urlField, v.ct)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View file

@ -11,10 +11,9 @@ import (
type criterionType int type criterionType int
const ( const (
// ctTerm is for searching by the domain name, the client's IP // ctTerm is for searching by the domain name, the client's IP address,
// address, the client's ID or the client's name. // the client's ID or the client's name. The domain name search
// // supports IDNAs.
// TODO(e.burkov): Make it support IDNA while #3012.
ctTerm criterionType = iota ctTerm criterionType = iota
// ctFilteringStatus is for searching by the filtering status. // ctFilteringStatus is for searching by the filtering status.
// //
@ -47,6 +46,7 @@ var filteringStatusValues = []string{
// searchCriterion is a search criterion that is used to match a record. // searchCriterion is a search criterion that is used to match a record.
type searchCriterion struct { type searchCriterion struct {
value string value string
asciiVal string
criterionType criterionType criterionType criterionType
// strict, if true, means that the criterion must be applied to the // strict, if true, means that the criterion must be applied to the
// whole value rather than the part of it. That is, equality and not // whole value rather than the part of it. That is, equality and not
@ -54,14 +54,16 @@ type searchCriterion struct {
strict bool strict bool
} }
func (c *searchCriterion) ctDomainOrClientCaseStrict( func ctDomainOrClientCaseStrict(
term string, term string,
asciiTerm string,
clientID string, clientID string,
name string, name string,
host string, host string,
ip string, ip string,
) (ok bool) { ) (ok bool) {
return strings.EqualFold(host, term) || return strings.EqualFold(host, term) ||
(asciiTerm != "" && strings.EqualFold(host, asciiTerm)) ||
strings.EqualFold(clientID, term) || strings.EqualFold(clientID, term) ||
strings.EqualFold(ip, term) || strings.EqualFold(ip, term) ||
strings.EqualFold(name, term) strings.EqualFold(name, term)
@ -98,8 +100,9 @@ func containsFold(s, substr string) (ok bool) {
return false return false
} }
func (c *searchCriterion) ctDomainOrClientCaseNonStrict( func ctDomainOrClientCaseNonStrict(
term string, term string,
asciiTerm string,
clientID string, clientID string,
name string, name string,
host string, host string,
@ -107,6 +110,7 @@ func (c *searchCriterion) ctDomainOrClientCaseNonStrict(
) (ok bool) { ) (ok bool) {
return containsFold(clientID, term) || return containsFold(clientID, term) ||
containsFold(host, term) || containsFold(host, term) ||
(asciiTerm != "" && containsFold(host, asciiTerm)) ||
containsFold(ip, term) || containsFold(ip, term) ||
containsFold(name, term) containsFold(name, term)
} }
@ -127,10 +131,24 @@ func (c *searchCriterion) quickMatch(line string, findClient quickMatchClientFun
} }
if c.strict { if c.strict {
return c.ctDomainOrClientCaseStrict(c.value, clientID, name, host, ip) return ctDomainOrClientCaseStrict(
c.value,
c.asciiVal,
clientID,
name,
host,
ip,
)
} }
return c.ctDomainOrClientCaseNonStrict(c.value, clientID, name, host, ip) return ctDomainOrClientCaseNonStrict(
c.value,
c.asciiVal,
clientID,
name,
host,
ip,
)
case ctFilteringStatus: case ctFilteringStatus:
// Go on, as we currently don't do quick matches against // Go on, as we currently don't do quick matches against
// filtering statuses. // filtering statuses.
@ -162,12 +180,11 @@ func (c *searchCriterion) ctDomainOrClientCase(e *logEntry) bool {
} }
ip := e.IP.String() ip := e.IP.String()
term := strings.ToLower(c.value)
if c.strict { if c.strict {
return c.ctDomainOrClientCaseStrict(term, clientID, name, host, ip) return ctDomainOrClientCaseStrict(c.value, c.asciiVal, clientID, name, host, ip)
} }
return c.ctDomainOrClientCaseNonStrict(term, clientID, name, host, ip) return ctDomainOrClientCaseNonStrict(c.value, c.asciiVal, clientID, name, host, ip)
} }
func (c *searchCriterion) ctFilteringStatusCase(res filtering.Result) bool { func (c *searchCriterion) ctFilteringStatusCase(res filtering.Result) bool {