From 58eaf9940a90e5dfcf3282af078c83a6be1c38b1 Mon Sep 17 00:00:00 2001 From: Olivier Goffart Date: Sat, 20 Oct 2018 13:24:31 +0200 Subject: [PATCH] Database: Add an index on the parent path So we can quickly query the items in a parent directory This uses a custom slite3 function, which means that when downgrading the client, or using another tool to add entries in the database, any insertion in the metadata table will produce an error: "unknown function: parent_hash()" (This will crash the client 2.5) --- src/common/syncjournaldb.cpp | 58 ++++++++++++++++++++++++++++++++---- src/common/syncjournaldb.h | 2 ++ src/libsync/discovery.cpp | 5 +--- 3 files changed, 56 insertions(+), 9 deletions(-) diff --git a/src/common/syncjournaldb.cpp b/src/common/syncjournaldb.cpp index c745e7939..ed28a5e87 100644 --- a/src/common/syncjournaldb.cpp +++ b/src/common/syncjournaldb.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include "common/syncjournaldb.h" #include "version.h" @@ -342,6 +343,15 @@ bool SyncJournalDb::checkConnect() return sqlFail("Set PRAGMA case_sensitivity", pragma1); } + sqlite3_create_function(_db.sqliteDb(), "parent_hash", 1, SQLITE_UTF8 | SQLITE_DETERMINISTIC, nullptr, + [] (sqlite3_context *ctx,int, sqlite3_value **argv) { + auto text = reinterpret_cast(sqlite3_value_text(argv[0])); + const char *end = std::strrchr(text, '/'); + if (!end) end = text; + sqlite3_result_int64(ctx, c_jhash64(reinterpret_cast(text), + end - text, 0)); + }, nullptr, nullptr); + /* Because insert is so slow, we do everything in a transaction, and only need one call to commit */ startTransaction(); @@ -682,6 +692,16 @@ bool SyncJournalDb::updateMetadataTableStructure() commitInternal("update database structure: add path index"); } + if (1) { + SqlQuery query(_db); + query.prepare("CREATE INDEX IF NOT EXISTS metadata_parent ON metadata(parent_hash(path));"); + if (!query.exec()) { + sqlFail("updateMetadataTableStructure: create index parent", query); + re = false; + } + commitInternal("update database structure: add parent index"); + } + if (columns.indexOf("ignoredChildrenRemote") == -1) { SqlQuery query(_db); query.prepare("ALTER TABLE metadata ADD COLUMN ignoredChildrenRemote INT;"); @@ -847,11 +867,6 @@ QVector SyncJournalDb::tableColumns(const QByteArray &table) qint64 SyncJournalDb::getPHash(const QByteArray &file) { int64_t h = 0; - - if (file.isEmpty()) { - return -1; - } - int len = file.length(); h = c_jhash64((uint8_t *)file.data(), len, 0); @@ -1161,6 +1176,39 @@ bool SyncJournalDb::getFilesBelowPath(const QByteArray &path, const std::functio return true; } +bool SyncJournalDb::listFilesInPath(const QByteArray& path, + const std::function& rowCallback) +{ + QMutexLocker locker(&_mutex); + + if (_metadataTableIsEmpty) + return true; + + if (!checkConnect()) + return false; + + if (!_listFilesInPathQuery.initOrReset(QByteArrayLiteral( + GET_FILE_RECORD_QUERY " WHERE parent_hash(path) = ?1 ORDER BY path||'/' ASC"), _db)) + return false; + + _listFilesInPathQuery.bindValue(1, getPHash(path)); + + if (!_listFilesInPathQuery.exec()) + return false; + + while (_listFilesInPathQuery.next()) { + SyncJournalFileRecord rec; + fillFileRecordFromGetQuery(rec, _listFilesInPathQuery); + if (!rec._path.startsWith(path) || rec._path.indexOf("/", path.size() + 1) > 0) { + qWarning(lcDb) << "hash collision " << path << rec._path; + continue; + } + rowCallback(rec); + } + + return true; +} + int SyncJournalDb::getFileRecordCount() { QMutexLocker locker(&_mutex); diff --git a/src/common/syncjournaldb.h b/src/common/syncjournaldb.h index 31e757a94..8898eefe2 100644 --- a/src/common/syncjournaldb.h +++ b/src/common/syncjournaldb.h @@ -61,6 +61,7 @@ public: bool getFileRecordByInode(quint64 inode, SyncJournalFileRecord *rec); bool getFileRecordsByFileId(const QByteArray &fileId, const std::function &rowCallback); bool getFilesBelowPath(const QByteArray &path, const std::function &rowCallback); + bool listFilesInPath(const QByteArray &path, const std::function &rowCallback); bool setFileRecord(const SyncJournalFileRecord &record); /// Like setFileRecord, but preserves checksums @@ -276,6 +277,7 @@ private: SqlQuery _getFileRecordQueryByFileId; SqlQuery _getFilesBelowPathQuery; SqlQuery _getAllFilesQuery; + SqlQuery _listFilesInPathQuery; SqlQuery _setFileRecordQuery; SqlQuery _setFileRecordChecksumQuery; SqlQuery _setFileRecordLocalMetadataQuery; diff --git a/src/libsync/discovery.cpp b/src/libsync/discovery.cpp index cf8cf91ae..fc273b479 100644 --- a/src/libsync/discovery.cpp +++ b/src/libsync/discovery.cpp @@ -98,10 +98,7 @@ void ProcessDirectoryJob::process() // fetch all the name from the DB auto pathU8 = _currentFolder._original.toUtf8(); - // FIXME do that better (a query that do not get stuff recursively ?) - if (!_discoveryData->_statedb->getFilesBelowPath(pathU8, [&](const SyncJournalFileRecord &rec) { - if (rec._path.indexOf("/", pathU8.size() + 1) > 0) - return; + if (!_discoveryData->_statedb->listFilesInPath(pathU8, [&](const SyncJournalFileRecord &rec) { auto name = pathU8.isEmpty() ? rec._path : QString::fromUtf8(rec._path.mid(pathU8.size() + 1)); if (rec._type == ItemTypeVirtualFile || rec._type == ItemTypeVirtualFileDownload) { name.chop(_discoveryData->_syncOptions._virtualFileSuffix.size());