AdGuardHome/internal/querylog/search.go
Stanislav Chzhen 18acdf9b09 Pull request 1809: 4299-querylog-stats-clients-api
Merge in DNS/adguard-home from 4299-querylog-stats-clients-api to master

Squashed commit of the following:

commit 066100a7869d7572c4ae65b3c7b1487ac50baf15
Merge: 95bc00c0 5da77514
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Fri Apr 14 13:57:30 2023 +0300

    Merge branch 'master' into 4299-querylog-stats-clients-api

commit 95bc00c0b3d05b262ee0b90be9757e61cac0778c
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Thu Apr 13 11:48:39 2023 +0300

    all: fix typo

commit 4b868da48f0c976d204346e40ba948803be6397f
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Thu Apr 13 11:42:52 2023 +0300

    all: fix text label

commit 7a3ba5c7f688bd53cf761b5e8e614fbe251bd006
Merge: 315256e3 6c8d89a4
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Thu Apr 13 11:34:59 2023 +0300

    Merge branch 'master' into 4299-querylog-stats-clients-api

commit 315256e3f3861b5116962f7c47384b7c72e41813
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Tue Apr 11 19:07:18 2023 +0300

    all: ignore search, unit

commit 28c6ffec9558e7c38d7bd12055eabddb8f5675c2
Author: Artem Krisanov <a.krisanov@adguard.com>
Date:   Tue Apr 11 15:08:35 2023 +0300

    Added 'Protection' and 'Query Log and statistics' sections to client settings. Added checkboxes to ignore client in (query log/statistics)

commit 2657bd2b820d8b2b3d71d23e4545c867b9ae6cdf
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Mon Apr 10 17:28:59 2023 +0300

    all: add todo

commit e151fcbc0c36d8e6a5c091fbf374bf0e35804699
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Mon Apr 10 15:15:46 2023 +0300

    openapi: imp docs

commit 31875cbbd1bd09a73baa3636d0cc242b5ac35059
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Mon Apr 10 13:02:31 2023 +0300

    all: add querylog stats client ignore api
2023-04-14 15:25:04 +03:00

301 lines
7.3 KiB
Go

package querylog
import (
"io"
"time"
"github.com/AdguardTeam/golibs/log"
"golang.org/x/exp/slices"
)
// client finds the client info, if any, by its ClientID and IP address,
// optionally checking the provided cache. It will use the IP address
// regardless of if the IP anonymization is enabled now, because the
// anonymization could have been disabled in the past, and client will try to
// find those records as well.
func (l *queryLog) client(clientID, ip string, cache clientCache) (c *Client, err error) {
cck := clientCacheKey{clientID: clientID, ip: ip}
var ok bool
if c, ok = cache[cck]; ok {
return c, nil
}
var ids []string
if clientID != "" {
ids = append(ids, clientID)
}
if ip != "" {
ids = append(ids, ip)
}
c, err = l.findClient(ids)
if err != nil {
return nil, err
}
// Cache all results, including negative ones, to prevent excessive and
// expensive client searching.
cache[cck] = c
return c, nil
}
// searchMemory looks up log records which are currently in the in-memory
// buffer. It optionally uses the client cache, if provided. It also returns
// the total amount of records in the buffer at the moment of searching.
func (l *queryLog) searchMemory(params *searchParams, cache clientCache) (entries []*logEntry, total int) {
l.bufferLock.Lock()
defer l.bufferLock.Unlock()
// Go through the buffer in the reverse order, from newer to older.
var err error
for i := len(l.buffer) - 1; i >= 0; i-- {
// A shallow clone is enough, since the only thing that this loop
// modifies is the client field.
e := l.buffer[i].shallowClone()
e.client, err = l.client(e.ClientID, e.IP.String(), cache)
if err != nil {
msg := "querylog: enriching memory record at time %s" +
" for client %q (clientid %q): %s"
log.Error(msg, e.Time, e.IP, e.ClientID, err)
// Go on and try to match anyway.
}
if params.match(e) {
entries = append(entries, e)
}
}
return entries, len(l.buffer)
}
// search - searches log entries in the query log using specified parameters
// returns the list of entries found + time of the oldest entry
func (l *queryLog) search(params *searchParams) (entries []*logEntry, oldest time.Time) {
start := time.Now()
if params.limit == 0 {
return []*logEntry{}, time.Time{}
}
cache := clientCache{}
memoryEntries, bufLen := l.searchMemory(params, cache)
log.Debug("querylog: got %d entries from memory", len(memoryEntries))
fileEntries, oldest, total := l.searchFiles(params, cache)
log.Debug("querylog: got %d entries from files", len(fileEntries))
total += bufLen
totalLimit := params.offset + params.limit
// now let's get a unified collection
entries = append(memoryEntries, fileEntries...)
if len(entries) > totalLimit {
// remove extra records
entries = entries[:totalLimit]
}
// Resort entries on start time to partially mitigate query log looking
// weird on the frontend.
//
// See https://github.com/AdguardTeam/AdGuardHome/issues/2293.
slices.SortStableFunc(entries, func(a, b *logEntry) (sortsBefore bool) {
return a.Time.After(b.Time)
})
if params.offset > 0 {
if len(entries) > params.offset {
entries = entries[params.offset:]
} else {
entries = make([]*logEntry, 0)
oldest = time.Time{}
}
}
if len(entries) > 0 {
// Update oldest after merging in the memory buffer.
oldest = entries[len(entries)-1].Time
}
log.Debug(
"querylog: prepared data (%d/%d) older than %s in %s",
len(entries),
total,
params.olderThan,
time.Since(start),
)
return entries, oldest
}
// searchFiles looks up log records from all log files. It optionally uses the
// client cache, if provided. searchFiles does not scan more than
// maxFileScanEntries so callers may need to call it several times to get all
// results. oldest and total are the time of the oldest processed entry and the
// total number of processed entries, including discarded ones, correspondingly.
func (l *queryLog) searchFiles(
params *searchParams,
cache clientCache,
) (entries []*logEntry, oldest time.Time, total int) {
files := []string{
l.logFile + ".1",
l.logFile,
}
r, err := NewQLogReader(files)
if err != nil {
log.Error("querylog: opening qlog reader: %s", err)
return entries, oldest, 0
}
defer func() {
closeErr := r.Close()
if closeErr != nil {
log.Error("querylog: closing file: %s", err)
}
}()
if params.olderThan.IsZero() {
err = r.SeekStart()
} else {
err = r.seekTS(params.olderThan.UnixNano())
if err == nil {
// Read to the next record, because we only need the one that goes
// after it.
_, err = r.ReadNext()
}
}
if err != nil {
log.Debug("querylog: cannot seek to %s: %s", params.olderThan, err)
return entries, oldest, 0
}
totalLimit := params.offset + params.limit
oldestNano := int64(0)
// By default, we do not scan more than maxFileScanEntries at once. The
// idea is to make search calls faster so that the UI could handle it and
// show something quicker. This behavior can be overridden if
// maxFileScanEntries is set to 0.
for total < params.maxFileScanEntries || params.maxFileScanEntries <= 0 {
var e *logEntry
var ts int64
e, ts, err = l.readNextEntry(r, params, cache)
if err != nil {
if err == io.EOF {
oldestNano = 0
break
}
log.Error("querylog: reading next entry: %s", err)
}
oldestNano = ts
total++
if e != nil {
entries = append(entries, e)
if len(entries) == totalLimit {
break
}
}
}
if oldestNano != 0 {
oldest = time.Unix(0, oldestNano)
}
return entries, oldest, total
}
// quickMatchClientFinder is a wrapper around the usual client finding function
// to make it easier to use with quick matches.
type quickMatchClientFinder struct {
client func(clientID, ip string, cache clientCache) (c *Client, err error)
cache clientCache
}
// findClient is a method that can be used as a quickMatchClientFinder.
func (f quickMatchClientFinder) findClient(clientID, ip string) (c *Client) {
var err error
c, err = f.client(clientID, ip, f.cache)
if err != nil {
log.Error(
"querylog: enriching file record for quick search: for client %q (clientid %q): %s",
ip,
clientID,
err,
)
}
return c
}
// readNextEntry reads the next log entry and checks if it matches the search
// criteria. It optionally uses the client cache, if provided. e is nil if the
// entry doesn't match the search criteria. ts is the timestamp of the
// processed entry.
func (l *queryLog) readNextEntry(
r *QLogReader,
params *searchParams,
cache clientCache,
) (e *logEntry, ts int64, err error) {
var line string
line, err = r.ReadNext()
if err != nil {
return nil, 0, err
}
clientFinder := quickMatchClientFinder{
client: l.client,
cache: cache,
}
if !params.quickMatch(line, clientFinder.findClient) {
ts = readQLogTimestamp(line)
return nil, ts, nil
}
e = &logEntry{}
decodeLogEntry(e, line)
if l.isIgnored(e.QHost) {
return nil, ts, nil
}
e.client, err = l.client(e.ClientID, e.IP.String(), cache)
if err != nil {
log.Error(
"querylog: enriching file record at time %s for client %q (clientid %q): %s",
e.Time,
e.IP,
e.ClientID,
err,
)
// Go on and try to match anyway.
}
if e.client != nil && e.client.IgnoreQueryLog {
return nil, ts, nil
}
ts = e.Time.UnixNano()
if !params.match(e) {
return nil, ts, nil
}
return e, ts, nil
}