Pull request: querylog: imp perf

Merge in DNS/adguard-home from contains-fold to master

Squashed commit of the following:

commit 45c79b4b7618c8f3108766cc776b5bd3f0571761
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date:   Wed May 19 21:26:09 2021 +0300

    querylog: imp perf
This commit is contained in:
Ainar Garipov 2021-05-20 13:42:35 +03:00
parent 6f7fd33afd
commit 21972e49cb
2 changed files with 158 additions and 13 deletions

View file

@ -2,6 +2,8 @@ package querylog
import ( import (
"strings" "strings"
"unicode"
"unicode/utf8"
"github.com/AdguardTeam/AdGuardHome/internal/dnsfilter" "github.com/AdguardTeam/AdGuardHome/internal/dnsfilter"
) )
@ -63,6 +65,37 @@ func (c *searchCriterion) ctDomainOrClientCaseStrict(
strings.EqualFold(name, term) strings.EqualFold(name, term)
} }
// containsFold reports whehter s contains, ignoring letter case, substr.
//
// TODO(a.garipov): Move to aghstrings if needed elsewhere.
func containsFold(s, substr string) (ok bool) {
sLen, substrLen := len(s), len(substr)
if sLen < substrLen {
return false
}
if sLen == substrLen {
return strings.EqualFold(s, substr)
}
first, _ := utf8.DecodeRuneInString(substr)
firstFolded := unicode.SimpleFold(first)
for i := 0; i != -1 && len(s) >= len(substr); {
if strings.EqualFold(s[:substrLen], substr) {
return true
}
i = strings.IndexFunc(s[1:], func(r rune) (eq bool) {
return r == first || r == firstFolded
})
s = s[1+i:]
}
return false
}
func (c *searchCriterion) ctDomainOrClientCaseNonStrict( func (c *searchCriterion) ctDomainOrClientCaseNonStrict(
term string, term string,
clientID string, clientID string,
@ -70,19 +103,10 @@ func (c *searchCriterion) ctDomainOrClientCaseNonStrict(
host string, host string,
ip string, ip string,
) (ok bool) { ) (ok bool) {
// TODO(a.garipov): Write a performant, case-insensitive version of return containsFold(clientID, term) ||
// strings.Contains instead of generating garbage. Or, perhaps in the containsFold(host, term) ||
// future, use a locale-appropriate matcher from golang.org/x/text. containsFold(ip, term) ||
clientID = strings.ToLower(clientID) containsFold(name, term)
host = strings.ToLower(host)
ip = strings.ToLower(ip)
name = strings.ToLower(name)
term = strings.ToLower(term)
return strings.Contains(clientID, term) ||
strings.Contains(host, term) ||
strings.Contains(ip, term) ||
strings.Contains(name, term)
} }
// quickMatch quickly checks if the line matches the given search criterion. // quickMatch quickly checks if the line matches the given search criterion.

View file

@ -0,0 +1,121 @@
package querylog
import (
"strings"
"testing"
"github.com/stretchr/testify/assert"
)
func TestContainsFold(t *testing.T) {
testCases := []struct {
name string
inS string
inSubstr string
want bool
}{{
name: "empty",
inS: "",
inSubstr: "",
want: true,
}, {
name: "shorter",
inS: "a",
inSubstr: "abc",
want: false,
}, {
name: "same_len_true",
inS: "abc",
inSubstr: "abc",
want: true,
}, {
name: "same_len_true_fold",
inS: "abc",
inSubstr: "aBc",
want: true,
}, {
name: "same_len_false",
inS: "abc",
inSubstr: "def",
want: false,
}, {
name: "longer_true",
inS: "abcdedef",
inSubstr: "def",
want: true,
}, {
name: "longer_false",
inS: "abcded",
inSubstr: "ghi",
want: false,
}, {
name: "longer_true_fold",
inS: "abcdedef",
inSubstr: "dEf",
want: true,
}, {
name: "longer_false_fold",
inS: "abcded",
inSubstr: "gHi",
want: false,
}, {
name: "longer_true_cyr_fold",
inS: "абвгдедеё",
inSubstr: "дЕЁ",
want: true,
}, {
name: "longer_false_cyr_fold",
inS: "абвгдедеё",
inSubstr: "жЗИ",
want: false,
}, {
name: "no_letters_true",
inS: "1.2.3.4",
inSubstr: "2.3.4",
want: true,
}, {
name: "no_letters_false",
inS: "1.2.3.4",
inSubstr: "2.3.5",
want: false,
}}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
if tc.want {
assert.True(t, containsFold(tc.inS, tc.inSubstr))
} else {
assert.False(t, containsFold(tc.inS, tc.inSubstr))
}
})
}
}
var sink bool
func BenchmarkContainsFold(b *testing.B) {
const s = "aaahBbBhccchDDDeEehFfFhGGGhHhh"
const substr = "HHH"
// Compare our implementation of containsFold against a stupid solution
// of calling strings.ToLower and strings.Contains.
b.Run("containsfold", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
sink = containsFold(s, substr)
}
assert.True(b, sink)
})
b.Run("tolower_contains", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
sink = strings.Contains(strings.ToLower(s), strings.ToLower(substr))
}
assert.True(b, sink)
})
}