From cbc7985e756e40ec6ee290c003ff343f825a89b2 Mon Sep 17 00:00:00 2001 From: Dimitry Kolyshev Date: Wed, 24 May 2023 16:33:15 +0300 Subject: [PATCH] Pull request: querylog imp code Merge in DNS/adguard-home from querylog-imp-code to master Squashed commit of the following: commit a58ad36508a2355b686d314dec51ac0b5e357281 Merge: df5494f2c 941eb1dd7 Author: Dimitry Kolyshev Date: Wed May 24 15:26:55 2023 +0300 Merge remote-tracking branch 'origin/master' into querylog-imp-code commit df5494f2c337736690a3c2a547c2d71858d0378f Author: Dimitry Kolyshev Date: Wed May 24 15:24:43 2023 +0300 querylog: imp code commit 8c3c2b76dd5858e7b107f222c112e9cde2477fb3 Author: Dimitry Kolyshev Date: Wed May 24 12:14:15 2023 +0300 all: lint script commit be04a4decfaf20a1649d32ecaab3c1c6bb205ffd Author: Dimitry Kolyshev Date: Wed May 24 12:03:12 2023 +0300 querylog: imp code commit fe7beacff3a5cfcf2332c4998b9c65820284eaf7 Author: Dimitry Kolyshev Date: Wed May 24 11:57:33 2023 +0300 querylog: imp docs commit 2ae239c57d12524fbc092f582842af2ad726c1d0 Author: Dimitry Kolyshev Date: Wed May 24 11:46:54 2023 +0300 querylog: imp code commit 417216cefbf154fa870f8f43468f35e0e345971f Author: Dimitry Kolyshev Date: Wed May 24 11:25:44 2023 +0300 querylog: imp code commit 514b6ee99113844a4e0dad30dc53703e3220c289 Author: Dimitry Kolyshev Date: Wed May 24 11:14:13 2023 +0300 querylog: imp docs commit 321351a3abb524208daacd5a3a7fbf5f07ab259d Author: Dimitry Kolyshev Date: Mon May 22 16:38:31 2023 +0300 querylog: imp code commit ee91de5c43210b5bc213f933d411adb894d2e586 Author: Dimitry Kolyshev Date: Mon May 22 16:01:32 2023 +0300 querylog: imp code commit 862ff12177fb769d5cb2ec250eaee538dc91d70a Author: Dimitry Kolyshev Date: Mon May 22 15:07:24 2023 +0300 querylog: imp code commit cc62c1c4ae8b813d03ccf51b596ba1ebf44d9a1f Merge: 37ace34e9 24b41100c Author: Dimitry Kolyshev Date: Mon May 22 13:09:10 2023 +0300 Merge remote-tracking branch 'origin/master' into querylog-imp-code commit 37ace34e91e5189bef6e774db960f40cdaa18270 Author: Dimitry Kolyshev Date: Mon May 22 11:23:08 2023 +0300 querylog: imp code commit 8417815a6349f10b5dbad410ce28aab98bc479fa Author: Dimitry Kolyshev Date: Mon May 22 11:08:29 2023 +0300 querylog: imp docs commit 4e5cde74d25713f78675aa3e18083b4fb5e619f3 Author: Dimitry Kolyshev Date: Fri May 19 16:41:34 2023 +0300 querylog: imp code commit 3494eab7006240f652a0217d305ac916bd6c3c83 Author: Dimitry Kolyshev Date: Fri May 19 16:13:08 2023 +0300 all: lint script commit 704534ce6278e7d9b1bef30a3acc4e59f25693bc Author: Dimitry Kolyshev Date: Fri May 19 16:12:04 2023 +0300 querylog: imp code commit 48510102a2fa5187f78067d2b9157dac62f8bb56 Author: Dimitry Kolyshev Date: Fri May 19 15:52:57 2023 +0300 querylog: imp code commit 89c273aea0e6758eb749a2d3bbaf1bc385a57797 Author: Dimitry Kolyshev Date: Fri May 19 15:40:50 2023 +0300 querylog: imp code commit 0057fe64553ad38de0fda10efb9d3512c9a00e45 Author: Dimitry Kolyshev Date: Fri May 19 13:54:46 2023 +0300 querylog: imp code ... and 1 more commit --- internal/querylog/decode.go | 184 +++++++++-------- internal/querylog/entry.go | 16 ++ internal/querylog/qlog.go | 94 +++++---- internal/querylog/qlog_test.go | 7 +- internal/querylog/qlogfile.go | 292 +++++++++++++++------------ internal/querylog/qlogfile_test.go | 24 +-- internal/querylog/qlogreader.go | 97 +++++---- internal/querylog/qlogreader_test.go | 16 +- internal/querylog/search.go | 138 ++++++++----- internal/querylog/searchparams.go | 25 ++- scripts/make/go-lint.sh | 4 +- 11 files changed, 515 insertions(+), 382 deletions(-) diff --git a/internal/querylog/decode.go b/internal/querylog/decode.go index 4e868b54..af9a7ca4 100644 --- a/internal/querylog/decode.go +++ b/internal/querylog/decode.go @@ -3,19 +3,24 @@ package querylog import ( "encoding/base64" "encoding/json" + "fmt" "io" "net" "strings" "time" "github.com/AdguardTeam/AdGuardHome/internal/filtering" + "github.com/AdguardTeam/golibs/errors" "github.com/AdguardTeam/golibs/log" "github.com/AdguardTeam/urlfilter/rules" "github.com/miekg/dns" ) +// logEntryHandler represents a handler for decoding json token to the logEntry +// struct. type logEntryHandler func(t json.Token, ent *logEntry) error +// logEntryHandlers is the map of log entry decode handlers for various keys. var logEntryHandlers = map[string]logEntryHandler{ "CID": func(t json.Token, ent *logEntry) error { v, ok := t.(string) @@ -166,6 +171,7 @@ var logEntryHandlers = map[string]logEntryHandler{ }, } +// decodeResultRuleKey decodes the token of "Rules" type to logEntry struct. func decodeResultRuleKey(key string, i int, dec *json.Decoder, ent *logEntry) { var vToken json.Token switch key { @@ -189,6 +195,8 @@ func decodeResultRuleKey(key string, i int, dec *json.Decoder, ent *logEntry) { } } +// decodeVTokenAndAddRule decodes the "Rules" toke as [filtering.ResultRule] +// and then adds the decoded object to the slice of result rules. func decodeVTokenAndAddRule( key string, i int, @@ -213,6 +221,8 @@ func decodeVTokenAndAddRule( return newRules, vToken } +// decodeResultRules parses the dec's tokens into logEntry ent interpreting it +// as a slice of the result rules. func decodeResultRules(dec *json.Decoder, ent *logEntry) { for { delimToken, err := dec.Token() @@ -224,48 +234,53 @@ func decodeResultRules(dec *json.Decoder, ent *logEntry) { return } - if d, ok := delimToken.(json.Delim); ok { - if d != '[' { - log.Debug("decodeResultRules: unexpected delim %q", d) + if d, ok := delimToken.(json.Delim); !ok { + return + } else if d != '[' { + log.Debug("decodeResultRules: unexpected delim %q", d) + } + + err = decodeResultRuleToken(dec, ent) + if err != nil { + if err != io.EOF && !errors.Is(err, ErrEndOfToken) { + log.Debug("decodeResultRules err: %s", err) } - } else { + return } + } +} - i := 0 - for { - var keyToken json.Token - keyToken, err = dec.Token() - if err != nil { - if err != io.EOF { - log.Debug("decodeResultRules err: %s", err) - } - - return - } - - if d, ok := keyToken.(json.Delim); ok { - switch d { - case '}': - i++ - case ']': - return - default: - // Go on. - } - - continue - } - - key, ok := keyToken.(string) - if !ok { - log.Debug("decodeResultRules: keyToken is %T (%[1]v) and not string", keyToken) - - return - } - - decodeResultRuleKey(key, i, dec, ent) +// decodeResultRuleToken decodes the tokens of "Rules" type to the logEntry ent. +func decodeResultRuleToken(dec *json.Decoder, ent *logEntry) (err error) { + i := 0 + for { + var keyToken json.Token + keyToken, err = dec.Token() + if err != nil { + // Don't wrap the error, because it's informative enough as is. + return err } + + if d, ok := keyToken.(json.Delim); ok { + switch d { + case '}': + i++ + case ']': + return ErrEndOfToken + default: + // Go on. + } + + continue + } + + key, ok := keyToken.(string) + if !ok { + return fmt.Errorf("keyToken is %T (%[1]v) and not string", keyToken) + } + + decodeResultRuleKey(key, i, dec, ent) } } @@ -322,6 +337,8 @@ func decodeResultReverseHosts(dec *json.Decoder, ent *logEntry) { } } +// decodeResultIPList parses the dec's tokens into logEntry ent interpreting it +// as the result IP addresses list. func decodeResultIPList(dec *json.Decoder, ent *logEntry) { for { itemToken, err := dec.Token() @@ -355,6 +372,8 @@ func decodeResultIPList(dec *json.Decoder, ent *logEntry) { } } +// decodeResultDNSRewriteResultKey decodes the token of "DNSRewriteResult" type +// to the logEntry struct. func decodeResultDNSRewriteResultKey(key string, dec *json.Decoder, ent *logEntry) { var err error @@ -395,50 +414,29 @@ func decodeResultDNSRewriteResultKey(key string, dec *json.Decoder, ent *logEntr log.Debug("decodeResultDNSRewriteResultKey response err: %s", err) } - for rrType, rrValues := range ent.Result.DNSRewriteResult.Response { - switch rrType { - case - dns.TypeA, - dns.TypeAAAA: - for i, v := range rrValues { - s, _ := v.(string) - rrValues[i] = net.ParseIP(s) - } - default: - // Go on. - } - } + ent.parseDNSRewriteResultIPs() default: // Go on. } } +// decodeResultDNSRewriteResult parses the dec's tokens into logEntry ent +// interpreting it as the result DNSRewriteResult. func decodeResultDNSRewriteResult(dec *json.Decoder, ent *logEntry) { for { - keyToken, err := dec.Token() + key, err := parseKeyToken(dec) if err != nil { - if err != io.EOF { - log.Debug("decodeResultDNSRewriteResult err: %s", err) + if err != io.EOF && !errors.Is(err, ErrEndOfToken) { + log.Debug("decodeResultDNSRewriteResult: %s", err) } return } - if d, ok := keyToken.(json.Delim); ok { - if d == '}' { - return - } - + if key == "" { continue } - key, ok := keyToken.(string) - if !ok { - log.Debug("decodeResultDNSRewriteResult: keyToken is %T (%[1]v) and not string", keyToken) - - return - } - decodeResultDNSRewriteResultKey(key, dec, ent) } } @@ -474,34 +472,51 @@ func translateResult(ent *logEntry) { res.IPList = nil } +// ErrEndOfToken is an error returned by parse key token when the closing +// bracket is found. +const ErrEndOfToken errors.Error = "end of token" + +// parseKeyToken parses the dec's token key. +func parseKeyToken(dec *json.Decoder) (key string, err error) { + keyToken, err := dec.Token() + if err != nil { + return "", err + } + + if d, ok := keyToken.(json.Delim); ok { + if d == '}' { + return "", ErrEndOfToken + } + + return "", nil + } + + key, ok := keyToken.(string) + if !ok { + return "", fmt.Errorf("keyToken is %T (%[1]v) and not string", keyToken) + } + + return key, nil +} + +// decodeResult decodes a token of "Result" type to logEntry struct. func decodeResult(dec *json.Decoder, ent *logEntry) { defer translateResult(ent) for { - keyToken, err := dec.Token() + key, err := parseKeyToken(dec) if err != nil { - if err != io.EOF { - log.Debug("decodeResult err: %s", err) + if err != io.EOF && !errors.Is(err, ErrEndOfToken) { + log.Debug("decodeResult: %s", err) } return } - if d, ok := keyToken.(json.Delim); ok { - if d == '}' { - return - } - + if key == "" { continue } - key, ok := keyToken.(string) - if !ok { - log.Debug("decodeResult: keyToken is %T (%[1]v) and not string", keyToken) - - return - } - decHandler, ok := resultDecHandlers[key] if ok { decHandler(dec, ent) @@ -527,13 +542,16 @@ func decodeResult(dec *json.Decoder, ent *logEntry) { } } +// resultHandlers is the map of log entry decode handlers for various keys. var resultHandlers = map[string]logEntryHandler{ "IsFiltered": func(t json.Token, ent *logEntry) error { v, ok := t.(bool) if !ok { return nil } + ent.Result.IsFiltered = v + return nil }, "Rule": func(t json.Token, ent *logEntry) error { @@ -578,11 +596,14 @@ var resultHandlers = map[string]logEntryHandler{ if !ok { return nil } + i, err := v.Int64() if err != nil { return err } + ent.Result.Reason = filtering.Reason(i) + return nil }, "ServiceName": func(t json.Token, ent *logEntry) error { @@ -607,6 +628,7 @@ var resultHandlers = map[string]logEntryHandler{ }, } +// resultDecHandlers is the map of decode handlers for various keys. var resultDecHandlers = map[string]func(dec *json.Decoder, ent *logEntry){ "ReverseHosts": decodeResultReverseHosts, "IPList": decodeResultIPList, @@ -614,9 +636,11 @@ var resultDecHandlers = map[string]func(dec *json.Decoder, ent *logEntry){ "DNSRewriteResult": decodeResultDNSRewriteResult, } +// decodeLogEntry decodes string str to logEntry ent. func decodeLogEntry(ent *logEntry, str string) { dec := json.NewDecoder(strings.NewReader(str)) dec.UseNumber() + for { keyToken, err := dec.Token() if err != nil { diff --git a/internal/querylog/entry.go b/internal/querylog/entry.go index eae99385..c3c800ed 100644 --- a/internal/querylog/entry.go +++ b/internal/querylog/entry.go @@ -68,3 +68,19 @@ func (e *logEntry) addResponse(resp *dns.Msg, isOrig bool) { log.Error("querylog: %s", err) } } + +// parseDNSRewriteResultIPs fills logEntry's DNSRewriteResult response records +// with the IP addresses parsed from the raw strings. +func (e *logEntry) parseDNSRewriteResultIPs() { + for rrType, rrValues := range e.Result.DNSRewriteResult.Response { + switch rrType { + case dns.TypeA, dns.TypeAAAA: + for i, v := range rrValues { + s, _ := v.(string) + rrValues[i] = net.ParseIP(s) + } + default: + // Go on. + } + } +} diff --git a/internal/querylog/qlog.go b/internal/querylog/qlog.go index 0a283299..4d543587 100644 --- a/internal/querylog/qlog.go +++ b/internal/querylog/qlog.go @@ -16,32 +16,35 @@ import ( "github.com/miekg/dns" ) -const ( - queryLogFileName = "querylog.json" // .gz added during compression -) +// queryLogFileName is a name of the log file. ".gz" extension is added later +// during compression. +const queryLogFileName = "querylog.json" -// queryLog is a structure that writes and reads the DNS query log +// queryLog is a structure that writes and reads the DNS query log. type queryLog struct { - findClient func(ids []string) (c *Client, err error) - // confMu protects conf. confMu *sync.RWMutex - conf *Config + + conf *Config + anonymizer *aghnet.IPMut + + findClient func(ids []string) (c *Client, err error) // logFile is the path to the log file. logFile string - // bufferLock protects buffer. - bufferLock sync.RWMutex // buffer contains recent log entries. The entries in this buffer must not // be modified. buffer []*logEntry - fileFlushLock sync.Mutex // synchronize a file-flushing goroutine and main thread - flushPending bool // don't start another goroutine while the previous one is still running + // bufferLock protects buffer. + bufferLock sync.RWMutex + + // fileFlushLock synchronizes a file-flushing goroutine and main thread. + fileFlushLock sync.Mutex fileWriteLock sync.Mutex - anonymizer *aghnet.IPMut + flushPending bool } // ClientProto values are names of the client protocols. @@ -155,6 +158,43 @@ func (l *queryLog) clear() { log.Debug("querylog: cleared") } +// newLogEntry creates an instance of logEntry from parameters. +func newLogEntry(params *AddParams) (entry *logEntry) { + q := params.Question.Question[0] + + entry = &logEntry{ + // TODO(d.kolyshev): Export this timestamp to func params. + Time: time.Now(), + + QHost: strings.ToLower(q.Name[:len(q.Name)-1]), + QType: dns.Type(q.Qtype).String(), + QClass: dns.Class(q.Qclass).String(), + + ClientID: params.ClientID, + ClientProto: params.ClientProto, + + Result: *params.Result, + Upstream: params.Upstream, + + IP: params.ClientIP, + + Elapsed: params.Elapsed, + + Cached: params.Cached, + AuthenticatedData: params.AuthenticatedData, + } + + if params.ReqECS != nil { + entry.ReqECS = params.ReqECS.String() + } + + entry.addResponse(params.Answer, false) + entry.addResponse(params.OrigAnswer, true) + + return entry +} + +// Add implements the [QueryLog] interface for *queryLog. func (l *queryLog) Add(params *AddParams) { var isEnabled, fileIsEnabled bool var memSize uint32 @@ -181,35 +221,7 @@ func (l *queryLog) Add(params *AddParams) { params.Result = &filtering.Result{} } - now := time.Now() - q := params.Question.Question[0] - entry := &logEntry{ - Time: now, - - QHost: strings.ToLower(q.Name[:len(q.Name)-1]), - QType: dns.Type(q.Qtype).String(), - QClass: dns.Class(q.Qclass).String(), - - ClientID: params.ClientID, - ClientProto: params.ClientProto, - - Result: *params.Result, - Upstream: params.Upstream, - - IP: params.ClientIP, - - Elapsed: params.Elapsed, - - Cached: params.Cached, - AuthenticatedData: params.AuthenticatedData, - } - - if params.ReqECS != nil { - entry.ReqECS = params.ReqECS.String() - } - - entry.addResponse(params.Answer, false) - entry.addResponse(params.OrigAnswer, true) + entry := newLogEntry(params) needFlush := false func() { diff --git a/internal/querylog/qlog_test.go b/internal/querylog/qlog_test.go index 48fc9381..58fcd704 100644 --- a/internal/querylog/qlog_test.go +++ b/internal/querylog/qlog_test.go @@ -45,9 +45,10 @@ func TestQueryLog(t *testing.T) { addEntry(l, "example.com", net.IPv4(1, 1, 1, 4), net.IPv4(2, 2, 2, 4)) type tcAssertion struct { - num int - host string - answer, client net.IP + host string + answer net.IP + client net.IP + num int } testCases := []struct { diff --git a/internal/querylog/qlogfile.go b/internal/querylog/qlogfile.go index fd6c5226..397840c9 100644 --- a/internal/querylog/qlogfile.go +++ b/internal/querylog/qlogfile.go @@ -12,141 +12,181 @@ import ( "github.com/AdguardTeam/golibs/log" ) -// Timestamp not found errors. const ( - ErrTSNotFound errors.Error = "ts not found" - ErrTSTooLate errors.Error = "ts too late" - ErrTSTooEarly errors.Error = "ts too early" + // Timestamp not found errors. + errTSNotFound errors.Error = "ts not found" + errTSTooLate errors.Error = "ts too late" + errTSTooEarly errors.Error = "ts too early" + + // maxEntrySize is a maximum size of the entry. + // + // TODO: Find a way to grow buffer instead of relying on this value when + // reading strings. + maxEntrySize = 16 * 1024 + + // bufferSize should be enough for at least this number of entries. + bufferSize = 100 * maxEntrySize ) -// TODO: Find a way to grow buffer instead of relying on this value when reading strings -const maxEntrySize = 16 * 1024 - -// buffer should be enough for at least this number of entries -const bufferSize = 100 * maxEntrySize - -// QLogFile represents a single query log file -// It allows reading from the file in the reverse order +// qLogFile represents a single query log file. It allows reading from the +// file in the reverse order. // -// Please note that this is a stateful object. -// Internally, it contains a pointer to a specific position in the file, -// and it reads lines in reverse order starting from that position. -type QLogFile struct { - file *os.File // the query log file - position int64 // current position in the file +// Please note, that this is a stateful object. Internally, it contains a +// pointer to a specific position in the file, and it reads lines in reverse +// order starting from that position. +type qLogFile struct { + // file is the query log file. + file *os.File - buffer []byte // buffer that we've read from the file - bufferStart int64 // start of the buffer (in the file) - bufferLen int // buffer len + // buffer that we've read from the file. + buffer []byte - lock sync.Mutex // We use mutex to make it thread-safe + // lock is a mutex to make it thread-safe. + lock sync.Mutex + + // position is the position in the file. + position int64 + + // bufferStart is the start of the buffer (in the file). + bufferStart int64 + + // bufferLen is the length of the buffer. + bufferLen int } -// NewQLogFile initializes a new instance of the QLogFile -func NewQLogFile(path string) (*QLogFile, error) { +// newQLogFile initializes a new instance of the qLogFile. +func newQLogFile(path string) (qf *qLogFile, err error) { f, err := os.OpenFile(path, os.O_RDONLY, 0o644) if err != nil { return nil, err } - return &QLogFile{ - file: f, - }, nil + return &qLogFile{file: f}, nil +} + +// validateQLogLineIdx returns error if the line index is not valid to continue +// search. +func (q *qLogFile) validateQLogLineIdx(lineIdx, lastProbeLineIdx, ts, fSize int64) (err error) { + if lineIdx == lastProbeLineIdx { + if lineIdx == 0 { + return errTSTooEarly + } + + // If we're testing the same line twice then most likely the scope is + // too narrow and we won't find anything anymore in any other file. + return fmt.Errorf("looking up timestamp %d in %q: %w", ts, q.file.Name(), errTSNotFound) + } else if lineIdx == fSize { + return errTSTooLate + } + + return nil } // seekTS performs binary search in the query log file looking for a record -// with the specified timestamp. Once the record is found, it sets -// "position" so that the next ReadNext call returned that record. +// with the specified timestamp. Once the record is found, it sets "position" +// so that the next ReadNext call returned that record. // // The algorithm is rather simple: -// 1. It starts with the position in the middle of a file -// 2. Shifts back to the beginning of the line -// 3. Checks the log record timestamp -// 4. If it is lower than the timestamp we are looking for, -// it shifts seek position to 3/4 of the file. Otherwise, to 1/4 of the file. -// 5. It performs the search again, every time the search scope is narrowed twice. +// 1. It starts with the position in the middle of a file. +// 2. Shifts back to the beginning of the line. +// 3. Checks the log record timestamp. +// 4. If it is lower than the timestamp we are looking for, it shifts seek +// position to 3/4 of the file. Otherwise, to 1/4 of the file. +// 5. It performs the search again, every time the search scope is narrowed +// twice. // // Returns: -// * It returns the position of the the line with the timestamp we were looking for -// so that when we call "ReadNext" this line was returned. -// * Depth of the search (how many times we compared timestamps). -// * If we could not find it, it returns one of the errors described above. -func (q *QLogFile) seekTS(timestamp int64) (int64, int, error) { +// - It returns the position of the line with the timestamp we were looking +// for so that when we call "ReadNext" this line was returned. +// - Depth of the search (how many times we compared timestamps). +// - If we could not find it, it returns one of the errors described above. +func (q *qLogFile) seekTS(timestamp int64) (pos int64, depth int, err error) { q.lock.Lock() defer q.lock.Unlock() - // Empty the buffer + // Empty the buffer. q.buffer = nil - // First of all, check the file size + // First of all, check the file size. fileInfo, err := q.file.Stat() if err != nil { return 0, 0, err } - // Define the search scope - start := int64(0) // start of the search interval (position in the file) - end := fileInfo.Size() // end of the search interval (position in the file) - probe := (end - start) / 2 // probe -- approximate index of the line we'll try to check + // Define the search scope. + + // Start of the search interval (position in the file). + start := int64(0) + // End of the search interval (position in the file). + end := fileInfo.Size() + // Probe is the approximate index of the line we'll try to check. + probe := (end - start) / 2 + var line string - var lineIdx int64 // index of the probe line in the file + // Index of the probe line in the file. + var lineIdx int64 var lineEndIdx int64 - var lastProbeLineIdx int64 // index of the last probe line + // Index of the last probe line. + var lastProbeLineIdx int64 lastProbeLineIdx = -1 - // Count seek depth in order to detect mistakes - // If depth is too large, we should stop the search - depth := 0 - + // Count seek depth in order to detect mistakes. If depth is too large, + // we should stop the search. for { - // Get the line at the specified position + // Get the line at the specified position. line, lineIdx, lineEndIdx, err = q.readProbeLine(probe) if err != nil { return 0, depth, err } - if lineIdx == lastProbeLineIdx { - if lineIdx == 0 { - return 0, depth, ErrTSTooEarly - } - - // If we're testing the same line twice then most likely - // the scope is too narrow and we won't find anything - // anymore in any other file. - return 0, depth, fmt.Errorf("looking up timestamp %d in %q: %w", timestamp, q.file.Name(), ErrTSNotFound) - } else if lineIdx == fileInfo.Size() { - return 0, depth, ErrTSTooLate + // Check if the line index if invalid. + err = q.validateQLogLineIdx(lineIdx, lastProbeLineIdx, timestamp, fileInfo.Size()) + if err != nil { + return 0, depth, err } - // Save the last found idx + // Save the last found idx. lastProbeLineIdx = lineIdx - // Get the timestamp from the query log record + // Get the timestamp from the query log record. ts := readQLogTimestamp(line) if ts == 0 { - return 0, depth, fmt.Errorf("looking up timestamp %d in %q: record %q has empty timestamp", timestamp, q.file.Name(), line) + return 0, depth, fmt.Errorf( + "looking up timestamp %d in %q: record %q has empty timestamp", + timestamp, + q.file.Name(), + line, + ) } if ts == timestamp { - // Hurray, returning the result + // Hurray, returning the result. break } - // Narrow the scope and repeat the search + // Narrow the scope and repeat the search. if ts > timestamp { - // If the timestamp we're looking for is OLDER than what we found - // Then the line is somewhere on the LEFT side from the current probe position + // If the timestamp we're looking for is OLDER than what we found, + // then the line is somewhere on the LEFT side from the current + // probe position. end = lineIdx } else { - // If the timestamp we're looking for is NEWER than what we found - // Then the line is somewhere on the RIGHT side from the current probe position + // If the timestamp we're looking for is NEWER than what we found, + // then the line is somewhere on the RIGHT side from the current + // probe position. start = lineEndIdx } probe = start + (end-start)/2 depth++ if depth >= 100 { - return 0, depth, fmt.Errorf("looking up timestamp %d in %q: depth %d too high: %w", timestamp, q.file.Name(), depth, ErrTSNotFound) + return 0, depth, fmt.Errorf( + "looking up timestamp %d in %q: depth %d too high: %w", + timestamp, + q.file.Name(), + depth, + errTSNotFound, + ) } } @@ -154,37 +194,39 @@ func (q *QLogFile) seekTS(timestamp int64) (int64, int, error) { return q.position, depth, nil } -// SeekStart changes the current position to the end of the file -// Please note that we're reading query log in the reverse order -// and that's why log start is actually the end of file +// SeekStart changes the current position to the end of the file. Please note, +// that we're reading query log in the reverse order and that's why log start +// is actually the end of file. // -// Returns nil if we were able to change the current position. -// Returns error in any other case. -func (q *QLogFile) SeekStart() (int64, error) { +// Returns nil if we were able to change the current position. Returns error +// in any other case. +func (q *qLogFile) SeekStart() (int64, error) { q.lock.Lock() defer q.lock.Unlock() - // Empty the buffer + // Empty the buffer. q.buffer = nil - // First of all, check the file size + // First of all, check the file size. fileInfo, err := q.file.Stat() if err != nil { return 0, err } - // Place the position to the very end of file + // Place the position to the very end of file. q.position = fileInfo.Size() - 1 if q.position < 0 { q.position = 0 } + return q.position, nil } -// ReadNext reads the next line (in the reverse order) from the file -// and shifts the current position left to the next (actually prev) line. -// returns io.EOF if there's nothing to read more -func (q *QLogFile) ReadNext() (string, error) { +// ReadNext reads the next line (in the reverse order) from the file and shifts +// the current position left to the next (actually prev) line. +// +// Returns io.EOF if there's nothing more to read. +func (q *qLogFile) ReadNext() (string, error) { q.lock.Lock() defer q.lock.Unlock() @@ -197,35 +239,34 @@ func (q *QLogFile) ReadNext() (string, error) { return "", err } - // Shift position + // Shift position. if lineIdx == 0 { q.position = 0 } else { - // there's usually a line break before the line - // so we should shift one more char left from the line - // line\nline + // There's usually a line break before the line, so we should shift one + // more char left from the line "\nline". q.position = lineIdx - 1 } return line, err } -// Close frees the underlying resources -func (q *QLogFile) Close() error { +// Close frees the underlying resources. +func (q *qLogFile) Close() error { return q.file.Close() } -// readNextLine reads the next line from the specified position -// this line actually have to END on that position. +// readNextLine reads the next line from the specified position. This line +// actually have to END on that position. // -// the algorithm is: -// 1. check if we have the buffer initialized -// 2. if it is, scan it and look for the line there -// 3. if we cannot find the line there, read the prev chunk into the buffer -// 4. read the line from the buffer -func (q *QLogFile) readNextLine(position int64) (string, int64, error) { +// The algorithm is: +// 1. Check if we have the buffer initialized. +// 2. If it is so, scan it and look for the line there. +// 3. If we cannot find the line there, read the prev chunk into the buffer. +// 4. Read the line from the buffer. +func (q *qLogFile) readNextLine(position int64) (string, int64, error) { relativePos := position - q.bufferStart if q.buffer == nil || (relativePos < maxEntrySize && q.bufferStart != 0) { - // Time to re-init the buffer + // Time to re-init the buffer. err := q.initBuffer(position) if err != nil { return "", 0, err @@ -233,8 +274,7 @@ func (q *QLogFile) readNextLine(position int64) (string, int64, error) { relativePos = position - q.bufferStart } - // Look for the end of the prev line - // This is where we'll read from + // Look for the end of the prev line, this is where we'll read from. startLine := int64(0) for i := relativePos - 1; i >= 0; i-- { if q.buffer[i] == '\n' { @@ -245,18 +285,19 @@ func (q *QLogFile) readNextLine(position int64) (string, int64, error) { line := string(q.buffer[startLine:relativePos]) lineIdx := q.bufferStart + startLine + return line, lineIdx, nil } -// initBuffer initializes the QLogFile buffer. -// the goal is to read a chunk of file that includes the line with the specified position. -func (q *QLogFile) initBuffer(position int64) error { +// initBuffer initializes the qLogFile buffer. The goal is to read a chunk of +// file that includes the line with the specified position. +func (q *qLogFile) initBuffer(position int64) error { q.bufferStart = int64(0) if position > bufferSize { q.bufferStart = position - bufferSize } - // Seek to this position + // Seek to this position. _, err := q.file.Seek(q.bufferStart, io.SeekStart) if err != nil { return err @@ -271,34 +312,35 @@ func (q *QLogFile) initBuffer(position int64) error { return err } -// readProbeLine reads a line that includes the specified position -// this method is supposed to be used when we use binary search in the Seek method -// in the case of consecutive reads, use readNext (it uses a better buffer) -func (q *QLogFile) readProbeLine(position int64) (string, int64, int64, error) { - // First of all, we should read a buffer that will include the query log line - // In order to do this, we'll define the boundaries +// readProbeLine reads a line that includes the specified position. This +// method is supposed to be used when we use binary search in the Seek method. +// In the case of consecutive reads, use readNext, cause it uses better buffer. +func (q *qLogFile) readProbeLine(position int64) (string, int64, int64, error) { + // First of all, we should read a buffer that will include the query log + // line. In order to do this, we'll define the boundaries. seekPosition := int64(0) - relativePos := position // position relative to the buffer we're going to read + // Position relative to the buffer we're going to read. + relativePos := position if position > maxEntrySize { seekPosition = position - maxEntrySize relativePos = maxEntrySize } - // Seek to this position + // Seek to this position. _, err := q.file.Seek(seekPosition, io.SeekStart) if err != nil { return "", 0, 0, err } - // The buffer size is 2*maxEntrySize + // The buffer size is 2*maxEntrySize. buffer := make([]byte, maxEntrySize*2) bufferLen, err := q.file.Read(buffer) if err != nil { return "", 0, 0, err } - // Now start looking for the new line character starting - // from the relativePos and going left + // Now start looking for the new line character starting from the + // relativePos and going left. startLine := int64(0) for i := relativePos - 1; i >= 0; i-- { if buffer[i] == '\n' { @@ -306,7 +348,7 @@ func (q *QLogFile) readProbeLine(position int64) (string, int64, int64, error) { break } } - // Looking for the end of line now + // Looking for the end of line now. endLine := int64(bufferLen) lineEndIdx := endLine + seekPosition for i := relativePos; i < int64(bufferLen); i++ { @@ -317,13 +359,13 @@ func (q *QLogFile) readProbeLine(position int64) (string, int64, int64, error) { } } - // Finally we can return the string we were looking for + // Finally we can return the string we were looking for. lineIdx := startLine + seekPosition return string(buffer[startLine:endLine]), lineIdx, lineEndIdx, nil } -// readJSONvalue reads a JSON string in form of '"key":"value"'. prefix must be -// of the form '"key":"' to generate less garbage. +// readJSONValue reads a JSON string in form of '"key":"value"'. prefix must +// be of the form '"key":"' to generate less garbage. func readJSONValue(s, prefix string) string { i := strings.Index(s, prefix) if i == -1 { @@ -340,7 +382,7 @@ func readJSONValue(s, prefix string) string { return s[start:end] } -// readQLogTimestamp reads the timestamp field from the query log line +// readQLogTimestamp reads the timestamp field from the query log line. func readQLogTimestamp(str string) int64 { val := readJSONValue(str, `"T":"`) if len(val) == 0 { @@ -351,10 +393,12 @@ func readQLogTimestamp(str string) int64 { log.Error("Couldn't find timestamp: %s", str) return 0 } + tm, err := time.Parse(time.RFC3339Nano, val) if err != nil { log.Error("Couldn't parse timestamp: %s", val) return 0 } + return tm.UnixNano() } diff --git a/internal/querylog/qlogfile_test.go b/internal/querylog/qlogfile_test.go index 3e32420f..f91d3911 100644 --- a/internal/querylog/qlogfile_test.go +++ b/internal/querylog/qlogfile_test.go @@ -72,15 +72,15 @@ func prepareTestFiles(t *testing.T, filesNum, linesNum int) []string { return files } -// newTestQLogFile creates new *QLogFile for tests and registers the required +// newTestQLogFile creates new *qLogFile for tests and registers the required // cleanup functions. -func newTestQLogFile(t *testing.T, linesNum int) (file *QLogFile) { +func newTestQLogFile(t *testing.T, linesNum int) (file *qLogFile) { t.Helper() testFile := prepareTestFiles(t, 1, linesNum)[0] - // Create the new QLogFile instance. - file, err := NewQLogFile(testFile) + // Create the new qLogFile instance. + file, err := newQLogFile(testFile) require.NoError(t, err) assert.NotNil(t, file) @@ -240,7 +240,7 @@ func TestQLogFile_SeekTS_bad(t *testing.T) { } } -func getQLogFileLine(q *QLogFile, lineNumber int) (line string, err error) { +func getQLogFileLine(q *qLogFile, lineNumber int) (line string, err error) { if _, err = q.SeekStart(); err != nil { return line, err } @@ -256,7 +256,7 @@ func getQLogFileLine(q *QLogFile, lineNumber int) (line string, err error) { // Check adding and loading (with filtering) entries from disk and memory. func TestQLogFile(t *testing.T) { - // Create the new QLogFile instance. + // Create the new qLogFile instance. q := newTestQLogFile(t, 2) // Seek to the start. @@ -285,7 +285,7 @@ func TestQLogFile(t *testing.T) { assert.Empty(t, line) } -func NewTestQLogFileData(t *testing.T, data string) (file *QLogFile) { +func newTestQLogFileData(t *testing.T, data string) (file *qLogFile) { f, err := os.CreateTemp(t.TempDir(), "*.txt") require.NoError(t, err) testutil.CleanupAndRequireSuccess(t, f.Close) @@ -293,7 +293,7 @@ func NewTestQLogFileData(t *testing.T, data string) (file *QLogFile) { _, err = f.WriteString(data) require.NoError(t, err) - file, err = NewQLogFile(f.Name()) + file, err = newQLogFile(f.Name()) require.NoError(t, err) testutil.CleanupAndRequireSuccess(t, file.Close) @@ -309,9 +309,9 @@ func TestQLog_Seek(t *testing.T) { timestamp, _ := time.Parse(time.RFC3339Nano, "2020-08-31T18:44:25.376690873+03:00") testCases := []struct { + wantErr error name string delta int - wantErr error wantDepth int }{{ name: "ok", @@ -321,12 +321,12 @@ func TestQLog_Seek(t *testing.T) { }, { name: "too_late", delta: 2, - wantErr: ErrTSTooLate, + wantErr: errTSTooLate, wantDepth: 2, }, { name: "too_early", delta: -2, - wantErr: ErrTSTooEarly, + wantErr: errTSTooEarly, wantDepth: 1, }} @@ -338,7 +338,7 @@ func TestQLog_Seek(t *testing.T) { timestamp.Add(time.Second).Format(time.RFC3339Nano), ) - q := NewTestQLogFileData(t, data) + q := newTestQLogFileData(t, data) _, depth, err := q.seekTS(timestamp.Add(time.Second * time.Duration(tc.delta)).UnixNano()) require.Truef(t, errors.Is(err, tc.wantErr), "%v", err) diff --git a/internal/querylog/qlogreader.go b/internal/querylog/qlogreader.go index 3454a441..610de02f 100644 --- a/internal/querylog/qlogreader.go +++ b/internal/querylog/qlogreader.go @@ -9,36 +9,36 @@ import ( "github.com/AdguardTeam/golibs/log" ) -// QLogReader allows reading from multiple query log files in the reverse order. +// qLogReader allows reading from multiple query log files in the reverse +// order. // -// Please note that this is a stateful object. -// Internally, it contains a pointer to a particular query log file, and -// to a specific position in this file, and it reads lines in reverse order -// starting from that position. -type QLogReader struct { - // qFiles - array with the query log files - // The order is - from oldest to newest - qFiles []*QLogFile +// Please note that this is a stateful object. Internally, it contains a +// pointer to a particular query log file, and to a specific position in this +// file, and it reads lines in reverse order starting from that position. +type qLogReader struct { + // qFiles is an array with the query log files. The order is from oldest + // to newest. + qFiles []*qLogFile - currentFile int // Index of the current file + // currentFile is the index of the current file. + currentFile int } -// NewQLogReader initializes a QLogReader instance -// with the specified files -func NewQLogReader(files []string) (*QLogReader, error) { - qFiles := make([]*QLogFile, 0) +// newQLogReader initializes a qLogReader instance with the specified files. +func newQLogReader(files []string) (*qLogReader, error) { + qFiles := make([]*qLogFile, 0) for _, f := range files { - q, err := NewQLogFile(f) + q, err := newQLogFile(f) if err != nil { if errors.Is(err, os.ErrNotExist) { continue } // Close what we've already opened. - cerr := closeQFiles(qFiles) - if cerr != nil { - log.Debug("querylog: closing files: %s", cerr) + cErr := closeQFiles(qFiles) + if cErr != nil { + log.Debug("querylog: closing files: %s", cErr) } return nil, err @@ -47,31 +47,28 @@ func NewQLogReader(files []string) (*QLogReader, error) { qFiles = append(qFiles, q) } - return &QLogReader{ - qFiles: qFiles, - currentFile: (len(qFiles) - 1), - }, nil + return &qLogReader{qFiles: qFiles, currentFile: len(qFiles) - 1}, nil } // seekTS performs binary search of a query log record with the specified -// timestamp. If the record is found, it sets QLogReader's position to point to -// that line, so that the next ReadNext call returned this line. -func (r *QLogReader) seekTS(timestamp int64) (err error) { +// timestamp. If the record is found, it sets qLogReader's position to point +// to that line, so that the next ReadNext call returned this line. +func (r *qLogReader) seekTS(timestamp int64) (err error) { for i := len(r.qFiles) - 1; i >= 0; i-- { q := r.qFiles[i] _, _, err = q.seekTS(timestamp) if err != nil { - if errors.Is(err, ErrTSTooEarly) { + if errors.Is(err, errTSTooEarly) { // Look at the next file, since we've reached the end of this // one. If there is no next file, it's not found. - err = ErrTSNotFound + err = errTSNotFound continue - } else if errors.Is(err, ErrTSTooLate) { + } else if errors.Is(err, errTSTooLate) { // Just seek to the start then. timestamp is probably between // the end of the previous one and the start of this one. return r.SeekStart() - } else if errors.Is(err, ErrTSNotFound) { + } else if errors.Is(err, errTSNotFound) { return err } else { return fmt.Errorf("seekts: file at index %d: %w", i, err) @@ -80,7 +77,7 @@ func (r *QLogReader) seekTS(timestamp int64) (err error) { // The search is finished, and the searched element has been found. // Update currentFile only, position is already set properly in - // QLogFile. + // qLogFile. r.currentFile = i return nil @@ -93,13 +90,13 @@ func (r *QLogReader) seekTS(timestamp int64) (err error) { return nil } -// SeekStart changes the current position to the end of the newest file -// Please note that we're reading query log in the reverse order -// and that's why log start is actually the end of file +// SeekStart changes the current position to the end of the newest file. +// Please note that we're reading query log in the reverse order and that's why +// the log starts actually at the end of file. // -// Returns nil if we were able to change the current position. -// Returns error in any other case. -func (r *QLogReader) SeekStart() error { +// Returns nil if we were able to change the current position. Returns error +// in any other cases. +func (r *qLogReader) SeekStart() error { if len(r.qFiles) == 0 { return nil } @@ -110,10 +107,12 @@ func (r *QLogReader) SeekStart() error { return err } -// ReadNext reads the next line (in the reverse order) from the query log files. -// and shifts the current position left to the next (actually prev) line (or the next file). -// returns io.EOF if there's nothing to read more. -func (r *QLogReader) ReadNext() (string, error) { +// ReadNext reads the next line (in the reverse order) from the query log +// files. Then shifts the current position left to the next (actually prev) +// line (or the next file). +// +// Returns io.EOF if there is nothing more to read. +func (r *qLogReader) ReadNext() (string, error) { if len(r.qFiles) == 0 { return "", io.EOF } @@ -122,7 +121,7 @@ func (r *QLogReader) ReadNext() (string, error) { q := r.qFiles[r.currentFile] line, err := q.ReadNext() if err != nil { - // Shift to the older file + // Shift to the older file. r.currentFile-- if r.currentFile < 0 { break @@ -130,10 +129,10 @@ func (r *QLogReader) ReadNext() (string, error) { q = r.qFiles[r.currentFile] - // Set it's position to the start right away + // Set its position to the start right away. _, err = q.SeekStart() - // This is unexpected, return an error right away + // This is unexpected, return an error right away. if err != nil { return "", err } @@ -142,17 +141,17 @@ func (r *QLogReader) ReadNext() (string, error) { } } - // Nothing to read anymore + // Nothing to read anymore. return "", io.EOF } -// Close closes the QLogReader -func (r *QLogReader) Close() error { +// Close closes the qLogReader. +func (r *qLogReader) Close() error { return closeQFiles(r.qFiles) } -// closeQFiles - helper method to close multiple QLogFile instances -func closeQFiles(qFiles []*QLogFile) error { +// closeQFiles is a helper method to close multiple qLogFile instances. +func closeQFiles(qFiles []*qLogFile) error { var errs []error for _, q := range qFiles { @@ -163,7 +162,7 @@ func closeQFiles(qFiles []*QLogFile) error { } if len(errs) > 0 { - return errors.List("error while closing QLogReader", errs...) + return errors.List("error while closing qLogReader", errs...) } return nil diff --git a/internal/querylog/qlogreader_test.go b/internal/querylog/qlogreader_test.go index ffdc285b..43bb3d5c 100644 --- a/internal/querylog/qlogreader_test.go +++ b/internal/querylog/qlogreader_test.go @@ -10,15 +10,15 @@ import ( "github.com/stretchr/testify/require" ) -// newTestQLogReader creates new *QLogReader for tests and registers the +// newTestQLogReader creates new *qLogReader for tests and registers the // required cleanup functions. -func newTestQLogReader(t *testing.T, filesNum, linesNum int) (reader *QLogReader) { +func newTestQLogReader(t *testing.T, filesNum, linesNum int) (reader *qLogReader) { t.Helper() testFiles := prepareTestFiles(t, filesNum, linesNum) - // Create the new QLogReader instance. - reader, err := NewQLogReader(testFiles) + // Create the new qLogReader instance. + reader, err := newQLogReader(testFiles) require.NoError(t, err) assert.NotNil(t, reader) @@ -75,9 +75,9 @@ func TestQLogReader_Seek(t *testing.T) { r := newTestQLogReader(t, 2, 10000) testCases := []struct { + want error name string time string - want error }{{ name: "not_too_old", time: "2020-02-18T22:39:35.920973+03:00", @@ -97,7 +97,7 @@ func TestQLogReader_Seek(t *testing.T) { }, { name: "non-existent_long_ago", time: "2000-02-19T01:23:16.920973+03:00", - want: ErrTSNotFound, + want: errTSNotFound, }, { name: "non-existent_far_ahead", time: "2100-02-19T01:23:16.920973+03:00", @@ -105,7 +105,7 @@ func TestQLogReader_Seek(t *testing.T) { }, { name: "non-existent_but_could", time: "2020-02-18T22:36:37.000000+03:00", - want: ErrTSNotFound, + want: errTSNotFound, }} for _, tc := range testCases { @@ -125,9 +125,9 @@ func TestQLogReader_ReadNext(t *testing.T) { r := newTestQLogReader(t, filesNum, linesNum) testCases := []struct { + want error name string start int - want error }{{ name: "ok", start: 0, diff --git a/internal/querylog/search.go b/internal/querylog/search.go index db2d3474..9102c49f 100644 --- a/internal/querylog/search.go +++ b/internal/querylog/search.go @@ -1,9 +1,11 @@ package querylog import ( + "fmt" "io" "time" + "github.com/AdguardTeam/golibs/errors" "github.com/AdguardTeam/golibs/log" "golang.org/x/exp/slices" ) @@ -134,84 +136,112 @@ func (l *queryLog) search(params *searchParams) (entries []*logEntry, oldest tim return entries, oldest } -// searchFiles looks up log records from all log files. It optionally uses the -// client cache, if provided. searchFiles does not scan more than -// maxFileScanEntries so callers may need to call it several times to get all -// results. oldest and total are the time of the oldest processed entry and the -// total number of processed entries, including discarded ones, correspondingly. -func (l *queryLog) searchFiles( - params *searchParams, - cache clientCache, -) (entries []*logEntry, oldest time.Time, total int) { +// seekRecord changes the current position to the next record older than the +// provided parameter. +func (r *qLogReader) seekRecord(olderThan time.Time) (err error) { + if olderThan.IsZero() { + return r.SeekStart() + } + + err = r.seekTS(olderThan.UnixNano()) + if err == nil { + // Read to the next record, because we only need the one that goes + // after it. + _, err = r.ReadNext() + } + + return err +} + +// setQLogReader creates a reader with the specified files and sets the +// position to the next record older than the provided parameter. +func (l *queryLog) setQLogReader(olderThan time.Time) (qr *qLogReader, err error) { files := []string{ l.logFile + ".1", l.logFile, } - r, err := NewQLogReader(files) + r, err := newQLogReader(files) if err != nil { - log.Error("querylog: opening qlog reader: %s", err) - - return entries, oldest, 0 - } - - defer func() { - closeErr := r.Close() - if closeErr != nil { - log.Error("querylog: closing file: %s", err) - } - }() - - if params.olderThan.IsZero() { - err = r.SeekStart() - } else { - err = r.seekTS(params.olderThan.UnixNano()) - if err == nil { - // Read to the next record, because we only need the one that goes - // after it. - _, err = r.ReadNext() - } + return nil, fmt.Errorf("opening qlog reader: %s", err) } + err = r.seekRecord(olderThan) if err != nil { - log.Debug("querylog: cannot seek to %s: %s", params.olderThan, err) + defer func() { err = errors.WithDeferred(err, r.Close()) }() + log.Debug("querylog: cannot seek to %s: %s", olderThan, err) - return entries, oldest, 0 + return nil, nil } - totalLimit := params.offset + params.limit - oldestNano := int64(0) + return r, nil +} - // By default, we do not scan more than maxFileScanEntries at once. The - // idea is to make search calls faster so that the UI could handle it and - // show something quicker. This behavior can be overridden if - // maxFileScanEntries is set to 0. +// readEntries reads entries from the reader to totalLimit. By default, we do +// not scan more than maxFileScanEntries at once. The idea is to make search +// calls faster so that the UI could handle it and show something quicker. +// This behavior can be overridden if maxFileScanEntries is set to 0. +func (l *queryLog) readEntries( + r *qLogReader, + params *searchParams, + cache clientCache, + totalLimit int, +) (entries []*logEntry, oldestNano int64, total int) { for total < params.maxFileScanEntries || params.maxFileScanEntries <= 0 { - var e *logEntry - var ts int64 - - e, ts, err = l.readNextEntry(r, params, cache) - if err != nil { - if err == io.EOF { + ent, ts, rErr := l.readNextEntry(r, params, cache) + if rErr != nil { + if rErr == io.EOF { oldestNano = 0 break } - log.Error("querylog: reading next entry: %s", err) + log.Error("querylog: reading next entry: %s", rErr) } oldestNano = ts total++ - if e != nil { - entries = append(entries, e) - if len(entries) == totalLimit { - break - } + if ent == nil { + continue + } + + entries = append(entries, ent) + if len(entries) == totalLimit { + break } } + return entries, oldestNano, total +} + +// searchFiles looks up log records from all log files. It optionally uses the +// client cache, if provided. searchFiles does not scan more than +// maxFileScanEntries so callers may need to call it several times to get all +// the results. oldest and total are the time of the oldest processed entry +// and the total number of processed entries, including discarded ones, +// correspondingly. +func (l *queryLog) searchFiles( + params *searchParams, + cache clientCache, +) (entries []*logEntry, oldest time.Time, total int) { + r, err := l.setQLogReader(params.olderThan) + if err != nil { + log.Error("querylog: %s", err) + } + + if r == nil { + return entries, oldest, 0 + } + + defer func() { + if closeErr := r.Close(); closeErr != nil { + log.Error("querylog: closing file: %s", closeErr) + } + }() + + totalLimit := params.offset + params.limit + entries, oldestNano, total := l.readEntries(r, params, cache, totalLimit) if oldestNano != 0 { oldest = time.Unix(0, oldestNano) } @@ -243,11 +273,11 @@ func (f quickMatchClientFinder) findClient(clientID, ip string) (c *Client) { } // readNextEntry reads the next log entry and checks if it matches the search -// criteria. It optionally uses the client cache, if provided. e is nil if the -// entry doesn't match the search criteria. ts is the timestamp of the +// criteria. It optionally uses the client cache, if provided. e is nil if +// the entry doesn't match the search criteria. ts is the timestamp of the // processed entry. func (l *queryLog) readNextEntry( - r *QLogReader, + r *qLogReader, params *searchParams, cache clientCache, ) (e *logEntry, ts int64, err error) { diff --git a/internal/querylog/searchparams.go b/internal/querylog/searchparams.go index f18ff561..a0a0ff6c 100644 --- a/internal/querylog/searchparams.go +++ b/internal/querylog/searchparams.go @@ -2,18 +2,25 @@ package querylog import "time" -// searchParams represent the search query sent by the client +// searchParams represent the search query sent by the client. type searchParams struct { - // searchCriteria - list of search criteria that we use to get filter results - searchCriteria []searchCriterion - - // olderThen - return entries that are older than this value - // if not set - disregard it and return any value + // olderThen represents a parameter for entries that are older than this + // parameter value. If not set, disregard it and return any value. olderThan time.Time - offset int // offset for the search - limit int // limit the number of records returned - maxFileScanEntries int // maximum log entries to scan in query log files. if 0 - no limit + // searchCriteria is a list of search criteria that we use to get filter + // results. + searchCriteria []searchCriterion + + // offset for the search. + offset int + + // limit the number of records returned. + limit int + + // maxFileScanEntries is a maximum of log entries to scan in query log + // files. If not set, then no limit. + maxFileScanEntries int } // newSearchParams - creates an empty instance of searchParams diff --git a/scripts/make/go-lint.sh b/scripts/make/go-lint.sh index aba2521f..acaca705 100644 --- a/scripts/make/go-lint.sh +++ b/scripts/make/go-lint.sh @@ -161,7 +161,6 @@ run_linter "$GO" vet ./... run_linter govulncheck ./... # Apply more lax standards to the code we haven't properly refactored yet. -run_linter gocyclo --over 13 ./internal/querylog run_linter gocyclo --over 12 ./internal/dhcpd # Apply the normal standards to new or somewhat refactored code. @@ -173,10 +172,11 @@ run_linter gocyclo --over 10\ ./internal/dnsforward/\ ./internal/filtering/\ ./internal/home/\ + ./internal/next/\ + ./internal/querylog/\ ./internal/stats/\ ./internal/tools/\ ./internal/updater/\ - ./internal/next/\ ./internal/version/\ ./scripts/blocked-services/\ ./scripts/vetted-filters/\