Local discovery: Use db instead of filesystem

We mostly trust the file watchers meaning that we don't re-scan the
local tree if we have done that recently and no file watcher events
have arrived. If the file watchers invalidate a subtree, we rescan
only that subtree.

Since we're not entirely sure the file watchers are reliable, we still
do full local discoveries regularly (1h by default). There is a config
file setting as well as an environment variable to control the interval.
This commit is contained in:
Christian Kamm 2017-09-14 16:43:23 +02:00 committed by ckamm
parent 66f0ce6616
commit e85a339d94
13 changed files with 292 additions and 40 deletions

View file

@ -27,6 +27,8 @@ Some interesting values that can be set on the configuration file are:
+---------------------------------+---------------+--------------------------------------------------------------------------------------------------------+
| ``forceSyncInterval`` | ``7200000`` | The duration of no activity after which a synchronization run shall be triggered automatically. |
+---------------------------------+---------------+--------------------------------------------------------------------------------------------------------+
| ``fullLocalDiscoveryInterval`` | ``3600000`` | The interval after which the next synchronization will perform a full local discovery. |
+---------------------------------+---------------+--------------------------------------------------------------------------------------------------------+
| ``notificationRefreshInterval`` | ``300000`` | Specifies the default interval of checking for new server notifications in milliseconds. |
+---------------------------------+---------------+--------------------------------------------------------------------------------------------------------+

View file

@ -547,13 +547,28 @@ bool SyncJournalDb::checkConnect()
return sqlFail("prepare _getFileRecordQueryByFileId", *_getFileRecordQueryByFileId);
}
// This query is used to skip discovery and fill the tree from the
// database instead
_getFilesBelowPathQuery.reset(new SqlQuery(_db));
if (_getFilesBelowPathQuery->prepare(
GET_FILE_RECORD_QUERY
" WHERE path > (?1||'/') AND path < (?1||'0') ORDER BY path||'/' ASC")) {
" WHERE path > (?1||'/') AND path < (?1||'0')"
// We want to ensure that the contents of a directory are sorted
// directly behind the directory itself. Without this ORDER BY
// an ordering like foo, foo-2, foo/file would be returned.
// With the trailing /, we get foo-2, foo, foo/file. This property
// is used in fill_tree_from_db().
" ORDER BY path||'/' ASC")) {
return sqlFail("prepare _getFilesBelowPathQuery", *_getFilesBelowPathQuery);
}
_getAllFilesQuery.reset(new SqlQuery(_db));
if (_getAllFilesQuery->prepare(
GET_FILE_RECORD_QUERY
" ORDER BY path||'/' ASC")) {
return sqlFail("prepare _getAllFilesQuery", *_getAllFilesQuery);
}
_setFileRecordQuery.reset(new SqlQuery(_db));
if (_setFileRecordQuery->prepare("INSERT OR REPLACE INTO metadata "
"(phash, pathlen, path, inode, uid, gid, mode, modtime, type, md5, fileid, remotePerm, filesize, ignoredChildrenRemote, contentChecksum, contentChecksumTypeId) "
@ -704,6 +719,7 @@ void SyncJournalDb::close()
_getFileRecordQueryByInode.reset(0);
_getFileRecordQueryByFileId.reset(0);
_getFilesBelowPathQuery.reset(0);
_getAllFilesQuery.reset(0);
_setFileRecordQuery.reset(0);
_setFileRecordChecksumQuery.reset(0);
_setFileRecordLocalMetadataQuery.reset(0);
@ -1133,16 +1149,23 @@ bool SyncJournalDb::getFilesBelowPath(const QByteArray &path, const std::functio
if (!checkConnect())
return false;
_getFilesBelowPathQuery->reset_and_clear_bindings();
_getFilesBelowPathQuery->bindValue(1, path);
// Since the path column doesn't store the starting /, the getFilesBelowPathQuery
// can't be used for the root path "". It would scan for (path > '/' and path < '0')
// and find nothing. So, unfortunately, we have to use a different query for
// retrieving the whole tree.
auto &query = path.isEmpty() ? _getAllFilesQuery : _getFilesBelowPathQuery;
if (!_getFilesBelowPathQuery->exec()) {
query->reset_and_clear_bindings();
if (query == _getFilesBelowPathQuery)
query->bindValue(1, path);
if (!query->exec()) {
return false;
}
while (_getFilesBelowPathQuery->next()) {
while (query->next()) {
SyncJournalFileRecord rec;
fillFileRecordFromGetQuery(rec, *_getFilesBelowPathQuery);
fillFileRecordFromGetQuery(rec, *query);
rowCallback(rec);
}

View file

@ -244,6 +244,7 @@ private:
QScopedPointer<SqlQuery> _getFileRecordQueryByInode;
QScopedPointer<SqlQuery> _getFileRecordQueryByFileId;
QScopedPointer<SqlQuery> _getFilesBelowPathQuery;
QScopedPointer<SqlQuery> _getAllFilesQuery;
QScopedPointer<SqlQuery> _setFileRecordQuery;
QScopedPointer<SqlQuery> _setFileRecordChecksumQuery;
QScopedPointer<SqlQuery> _setFileRecordLocalMetadataQuery;

View file

@ -314,6 +314,9 @@ int csync_s::reinitialize() {
local.files.clear();
remote.files.clear();
local_discovery_style = LocalDiscoveryStyle::FilesystemOnly;
locally_touched_dirs.clear();
status = CSYNC_STATUS_INIT;
SAFE_FREE(error_string);

View file

@ -38,6 +38,7 @@
#include <stdbool.h>
#include <sqlite3.h>
#include <map>
#include <set>
#include "common/syncjournaldb.h"
#include "config_csync.h"
@ -70,6 +71,11 @@ enum csync_replica_e {
REMOTE_REPLICA
};
enum class LocalDiscoveryStyle {
FilesystemOnly, //< read all local data from the filesystem
DatabaseAndFilesystem, //< read from the db, except for listed paths
};
/*
* This is a structurere similar to QStringRef
@ -190,6 +196,16 @@ struct OCSYNC_EXPORT csync_s {
*/
bool read_remote_from_db = false;
LocalDiscoveryStyle local_discovery_style = LocalDiscoveryStyle::FilesystemOnly;
/**
* List of folder-relative directory paths that should be scanned on the
* filesystem if the local_discovery_style suggests it.
*
* Their parents will be scanned too. The paths don't start with a /.
*/
std::set<QByteArray> locally_touched_dirs;
bool ignore_hidden_files = true;
csync_s(const char *localUri, OCC::SyncJournalDb *statedb);

View file

@ -435,15 +435,17 @@ static bool fill_tree_from_db(CSYNC *ctx, const char *uri)
{
int64_t count = 0;
QByteArray skipbase;
auto rowCallback = [ctx, &count, &skipbase](const OCC::SyncJournalFileRecord &rec) {
auto &files = ctx->current == LOCAL_REPLICA ? ctx->local.files : ctx->remote.files;
auto rowCallback = [ctx, &count, &skipbase, &files](const OCC::SyncJournalFileRecord &rec) {
if (ctx->current == REMOTE_REPLICA) {
/* When selective sync is used, the database may have subtrees with a parent
* whose etag (md5) is _invalid_. These are ignored and shall not appear in the
* whose etag is _invalid_. These are ignored and shall not appear in the
* remote tree.
* Sometimes folders that are not ignored by selective sync get marked as
* _invalid_, but that is not a problem as the next discovery will retrieve
* their correct etags again and we don't run into this case.
*/
if( rec._etag == "_invalid_") {
if (rec._etag == "_invalid_") {
qCDebug(lcUpdate, "%s selective sync excluded", rec._path.constData());
skipbase = rec._path;
skipbase += '/';
@ -452,12 +454,13 @@ static bool fill_tree_from_db(CSYNC *ctx, const char *uri)
/* Skip over all entries with the same base path. Note that this depends
* strongly on the ordering of the retrieved items. */
if( !skipbase.isEmpty() && rec._path.startsWith(skipbase) ) {
if (!skipbase.isEmpty() && rec._path.startsWith(skipbase)) {
qCDebug(lcUpdate, "%s selective sync excluded because the parent is", rec._path.constData());
return;
} else {
skipbase.clear();
}
}
std::unique_ptr<csync_file_stat_t> st = csync_file_stat_t::fromSyncJournalFileRecord(rec);
@ -477,7 +480,7 @@ static bool fill_tree_from_db(CSYNC *ctx, const char *uri)
}
/* store into result list. */
ctx->remote.files[rec._path] = std::move(st);
files[rec._path] = std::move(st);
++count;
};
@ -522,6 +525,26 @@ int csync_ftw(CSYNC *ctx, const char *uri, csync_walker_fn fn,
int rc = 0;
bool do_read_from_db = (ctx->current == REMOTE_REPLICA && ctx->remote.read_from_db);
const char *db_uri = uri;
if (ctx->current == LOCAL_REPLICA
&& ctx->local_discovery_style == LocalDiscoveryStyle::DatabaseAndFilesystem) {
const char *local_uri = uri + strlen(ctx->local.uri);
if (*local_uri == '/')
++local_uri;
db_uri = local_uri;
do_read_from_db = true;
// Minor bug: local_uri doesn't have a trailing /. Example: Assume it's "d/foo"
// and we want to check whether we should read from the db. Assume "d/foo a" is
// in locally_touched_dirs. Then this check will say no, don't read from the db!
// (because "d/foo" < "d/foo a" < "d/foo/bar")
// C++14: Could skip the conversion to QByteArray here.
auto it = ctx->locally_touched_dirs.lower_bound(QByteArray(local_uri));
if (it != ctx->locally_touched_dirs.end() && it->startsWith(local_uri)) {
do_read_from_db = false;
}
}
if (!depth) {
mark_current_item_ignored(ctx, previous_fs, CSYNC_STATUS_INDIVIDUAL_TOO_DEEP);
@ -533,7 +556,7 @@ int csync_ftw(CSYNC *ctx, const char *uri, csync_walker_fn fn,
// if the etag of this dir is still the same, its content is restored from the
// database.
if( do_read_from_db ) {
if( ! fill_tree_from_db(ctx, uri) ) {
if( ! fill_tree_from_db(ctx, db_uri) ) {
errno = ENOENT;
ctx->status_code = CSYNC_STATUS_OPENDIR_ERROR;
goto error;

View file

@ -448,12 +448,26 @@ int Folder::slotWipeErrorBlacklist()
void Folder::slotWatchedPathChanged(const QString &path)
{
if (!path.startsWith(this->path())) {
qCDebug(lcFolder) << "Changed path is not contained in folder, ignoring:" << path;
return;
}
auto relativePath = path.midRef(this->path().size());
// Add to list of locally modified paths
//
// We do this before checking for our own sync-related changes to make
// extra sure to not miss relevant changes.
auto relativePathBytes = relativePath.toUtf8();
_localDiscoveryPaths.insert(relativePathBytes);
qCDebug(lcFolder) << "local discovery: inserted" << relativePath << "due to file watcher";
// The folder watcher fires a lot of bogus notifications during
// a sync operation, both for actual user files and the database
// and log. Therefore we check notifications against operations
// the sync is doing to filter out our own changes.
#ifdef Q_OS_MAC
Q_UNUSED(path)
// On OSX the folder watcher does not report changes done by our
// own process. Therefore nothing needs to be done here!
#else
@ -465,16 +479,13 @@ void Folder::slotWatchedPathChanged(const QString &path)
#endif
// Check that the mtime actually changed.
if (path.startsWith(this->path())) {
auto relativePath = path.mid(this->path().size());
SyncJournalFileRecord record;
if (_journal.getFileRecord(relativePath, &record)
if (_journal.getFileRecord(relativePathBytes, &record)
&& record.isValid()
&& !FileSystem::fileChanged(path, record._fileSize, record._modtime)) {
qCInfo(lcFolder) << "Ignoring spurious notification for file" << relativePath;
return; // probably a spurious notification
}
}
emit watchedFileChangedExternally(path);
@ -645,6 +656,36 @@ void Folder::startSync(const QStringList &pathList)
setDirtyNetworkLimits();
setSyncOptions();
static qint64 fullLocalDiscoveryInterval = []() {
auto interval = ConfigFile().fullLocalDiscoveryInterval();
QByteArray env = qgetenv("OWNCLOUD_FULL_LOCAL_DISCOVERY_INTERVAL");
if (!env.isEmpty()) {
interval = env.toLongLong();
}
return interval;
}();
if (_folderWatcher && _folderWatcher->isReliable()
&& _timeSinceLastFullLocalDiscovery.isValid()
&& (fullLocalDiscoveryInterval < 0
|| _timeSinceLastFullLocalDiscovery.elapsed() < fullLocalDiscoveryInterval)) {
qCInfo(lcFolder) << "Allowing local discovery to read from the database";
_engine->setLocalDiscoveryOptions(LocalDiscoveryStyle::DatabaseAndFilesystem, _localDiscoveryPaths);
if (lcFolder().isDebugEnabled()) {
QByteArrayList paths;
for (auto &path : _localDiscoveryPaths)
paths.append(path);
qCDebug(lcFolder) << "local discovery paths: " << paths;
}
_previousLocalDiscoveryPaths = std::move(_localDiscoveryPaths);
} else {
qCInfo(lcFolder) << "Forbidding local discovery to read from the database";
_engine->setLocalDiscoveryOptions(LocalDiscoveryStyle::FilesystemOnly);
_previousLocalDiscoveryPaths.clear();
}
_localDiscoveryPaths.clear();
_engine->setIgnoreHiddenFiles(_definition.ignoreHiddenFiles);
QMetaObject::invokeMethod(_engine.data(), "startSync", Qt::QueuedConnection);
@ -783,6 +824,24 @@ void Folder::slotSyncFinished(bool success)
journalDb()->setSelectiveSyncList(SyncJournalDb::SelectiveSyncWhiteList, QStringList());
}
// bug: This function uses many different criteria for "sync was successful" - investigate!
if ((_syncResult.status() == SyncResult::Success
|| _syncResult.status() == SyncResult::Problem)
&& success) {
if (_engine->lastLocalDiscoveryStyle() == LocalDiscoveryStyle::FilesystemOnly) {
_timeSinceLastFullLocalDiscovery.start();
}
qCDebug(lcFolder) << "Sync success, forgetting last sync's local discovery path list";
} else {
// On overall-failure we can't forget about last sync's local discovery
// paths yet, reuse them for the next sync again.
// C++17: Could use std::set::merge().
_localDiscoveryPaths.insert(
_previousLocalDiscoveryPaths.begin(), _previousLocalDiscoveryPaths.end());
qCDebug(lcFolder) << "Sync failed, keeping last sync's local discovery path list";
}
_previousLocalDiscoveryPaths.clear();
emit syncStateChange();
// The syncFinished result that is to be triggered here makes the folderman
@ -851,6 +910,25 @@ void Folder::slotItemCompleted(const SyncFileItemPtr &item)
_folderWatcher->removePath(path() + item->_file);
}
// Success and failure of sync items adjust what the next sync is
// supposed to do.
//
// For successes, we want to wipe the file from the list to ensure we don't
// rediscover it even if this overall sync fails.
//
// For failures, we want to add the file to the list so the next sync
// will be able to retry it.
if (item->_status == SyncFileItem::Success
|| item->_status == SyncFileItem::FileIgnored
|| item->_status == SyncFileItem::Restoration
|| item->_status == SyncFileItem::Conflict) {
if (_previousLocalDiscoveryPaths.erase(item->_file.toUtf8()))
qCDebug(lcFolder) << "local discovery: wiped" << item->_file;
} else {
_localDiscoveryPaths.insert(item->_file.toUtf8());
qCDebug(lcFolder) << "local discovery: inserted" << item->_file << "due to sync failure";
}
_syncResult.processCompletedItem(item);
_fileLog->logItem(*item);
@ -903,6 +981,11 @@ void Folder::slotScheduleThisFolder()
FolderMan::instance()->scheduleFolder(this);
}
void Folder::slotNextSyncFullLocalDiscovery()
{
_timeSinceLastFullLocalDiscovery.invalidate();
}
void Folder::scheduleThisFolderSoon()
{
if (!_scheduleSelfTimer.isActive()) {
@ -925,6 +1008,8 @@ void Folder::registerFolderWatcher()
_folderWatcher.reset(new FolderWatcher(path(), this));
connect(_folderWatcher.data(), &FolderWatcher::pathChanged,
this, &Folder::slotWatchedPathChanged);
connect(_folderWatcher.data(), &FolderWatcher::lostChanges,
this, &Folder::slotNextSyncFullLocalDiscovery);
}
void Folder::slotAboutToRemoveAllFiles(SyncFileItem::Direction dir, bool *cancel)

View file

@ -27,6 +27,7 @@
#include <QObject>
#include <QStringList>
#include <set>
class QThread;
class QSettings;
@ -312,6 +313,9 @@ private slots:
*/
void slotScheduleThisFolder();
/** Ensures that the next sync performs a full local discovery. */
void slotNextSyncFullLocalDiscovery();
private:
bool setIgnoredFiles();
@ -346,6 +350,7 @@ private:
QString _lastEtag;
QElapsedTimer _timeSinceLastSyncDone;
QElapsedTimer _timeSinceLastSyncStart;
QElapsedTimer _timeSinceLastFullLocalDiscovery;
qint64 _lastSyncDuration;
/// The number of syncs that failed in a row.
@ -380,6 +385,22 @@ private:
* Created by registerFolderWatcher(), triggers slotWatchedPathChanged()
*/
QScopedPointer<FolderWatcher> _folderWatcher;
/**
* The paths that should be checked by the next local discovery.
*
* Mostly a collection of files the filewatchers have reported as touched.
* Also includes files that have had errors in the last sync run.
*/
std::set<QByteArray> _localDiscoveryPaths;
/**
* The paths that the current sync run used for local discovery.
*
* For failing syncs, this list will be merged into _localDiscoveryPaths
* again when the sync is done to make sure everything is retried.
*/
std::set<QByteArray> _previousLocalDiscoveryPaths;
};
}

View file

@ -36,6 +36,7 @@
#include <QNetworkProxy>
#define DEFAULT_REMOTE_POLL_INTERVAL 30000 // default remote poll time in milliseconds
#define DEFAULT_FULL_LOCAL_DISCOVERY_INTERVAL (60 * 60 * 1000) // 1 hour
#define DEFAULT_MAX_LOG_LINES 20000
namespace OCC {
@ -45,6 +46,7 @@ Q_LOGGING_CATEGORY(lcConfigFile, "sync.configfile", QtInfoMsg)
//static const char caCertsKeyC[] = "CaCertificates"; only used from account.cpp
static const char remotePollIntervalC[] = "remotePollInterval";
static const char forceSyncIntervalC[] = "forceSyncInterval";
static const char fullLocalDiscoveryIntervalC[] = "fullLocalDiscoveryInterval";
static const char notificationRefreshIntervalC[] = "notificationRefreshInterval";
static const char monoIconsC[] = "monoIcons";
static const char promptDeleteC[] = "promptDeleteAllFiles";
@ -406,6 +408,13 @@ quint64 ConfigFile::forceSyncInterval(const QString &connection) const
return interval;
}
qint64 ConfigFile::fullLocalDiscoveryInterval() const
{
QSettings settings(configFile(), QSettings::IniFormat);
settings.beginGroup(defaultConnection());
return settings.value(QLatin1String(fullLocalDiscoveryIntervalC), DEFAULT_FULL_LOCAL_DISCOVERY_INTERVAL).toLongLong();
}
quint64 ConfigFile::notificationRefreshInterval(const QString &connection) const
{
QString con(connection);

View file

@ -72,6 +72,13 @@ public:
/* Force sync interval, in milliseconds */
quint64 forceSyncInterval(const QString &connection = QString()) const;
/**
* Interval in milliseconds within which full local discovery is required
*
* Use -1 to disable regular full local discoveries.
*/
qint64 fullLocalDiscoveryInterval() const;
bool monoIcons() const;
void setMonoIcons(bool);

View file

@ -813,6 +813,7 @@ void SyncEngine::startSync()
}
_csync_ctx->read_remote_from_db = true;
_lastLocalDiscoveryStyle = _csync_ctx->local_discovery_style;
bool ok;
auto selectiveSyncBlackList = _journal->getSelectiveSyncList(SyncJournalDb::SelectiveSyncBlackList, &ok);
@ -1555,6 +1556,12 @@ AccountPtr SyncEngine::account() const
return _account;
}
void SyncEngine::setLocalDiscoveryOptions(LocalDiscoveryStyle style, std::set<QByteArray> dirs)
{
_csync_ctx->local_discovery_style = style;
_csync_ctx->locally_touched_dirs = std::move(dirs);
}
void SyncEngine::abort()
{
if (_propagator)

View file

@ -25,6 +25,7 @@
#include <QMap>
#include <QStringList>
#include <QSharedPointer>
#include <set>
#include <csync.h>
@ -92,6 +93,7 @@ public:
AccountPtr account() const;
SyncJournalDb *journal() const { return _journal; }
QString localPath() const { return _localPath; }
/**
* Minimum age, in milisecond, of a file that can be uploaded.
* Files more recent than that are not going to be uploaeded as they are considered
@ -99,6 +101,22 @@ public:
*/
static qint64 minimumFileAgeForUpload; // in ms
/**
* Control whether local discovery should read from filesystem or db.
*
* If style is Partial, the paths is a set of file paths relative to
* the synced folder. All the parent directories of these paths will not
* be read from the db and scanned on the filesystem.
*
* Note, the style and paths are only retained for the next sync and
* revert afterwards. Use _lastLocalDiscoveryStyle to discover the last
* sync's style.
*/
void setLocalDiscoveryOptions(LocalDiscoveryStyle style, std::set<QByteArray> dirs = {});
/** Access the last sync run's local discovery style */
LocalDiscoveryStyle lastLocalDiscoveryStyle() const { return _lastLocalDiscoveryStyle; }
signals:
void csyncUnavailable();
@ -272,6 +290,9 @@ private:
/** List of unique errors that occurred in a sync run. */
QSet<QString> _uniqueErrors;
/** The kind of local discovery the last sync run used */
LocalDiscoveryStyle _lastLocalDiscoveryStyle = LocalDiscoveryStyle::DatabaseAndFilesystem;
};
}

View file

@ -613,6 +613,40 @@ private slots:
QCOMPARE(nDELETE, 5);
QCOMPARE(fakeFolder.currentLocalState(), remoteInfo);
}
// Check correct behavior when local discovery is partially drawn from the db
void testLocalDiscoveryStyle()
{
FakeFolder fakeFolder{ FileInfo::A12_B12_C12_S12() };
// More subdirectories are useful for testing
fakeFolder.localModifier().mkdir("A/X");
fakeFolder.localModifier().mkdir("A/Y");
fakeFolder.localModifier().insert("A/X/x1");
fakeFolder.localModifier().insert("A/Y/y1");
QVERIFY(fakeFolder.syncOnce());
QCOMPARE(fakeFolder.currentLocalState(), fakeFolder.currentRemoteState());
// Test begins
fakeFolder.localModifier().insert("A/a3");
fakeFolder.localModifier().insert("A/X/x2");
fakeFolder.localModifier().insert("A/Y/y2");
fakeFolder.localModifier().insert("B/b3");
fakeFolder.remoteModifier().insert("C/c3");
fakeFolder.syncEngine().setLocalDiscoveryOptions(LocalDiscoveryStyle::DatabaseAndFilesystem, { "A/X" });
QVERIFY(fakeFolder.syncOnce());
QVERIFY(fakeFolder.currentRemoteState().find("A/a3"));
QVERIFY(fakeFolder.currentRemoteState().find("A/X/x2"));
QVERIFY(!fakeFolder.currentRemoteState().find("A/Y/y2"));
QVERIFY(!fakeFolder.currentRemoteState().find("B/b3"));
QVERIFY(fakeFolder.currentLocalState().find("C/c3"));
QCOMPARE(fakeFolder.syncEngine().lastLocalDiscoveryStyle(), LocalDiscoveryStyle::DatabaseAndFilesystem);
QVERIFY(fakeFolder.syncOnce());
QCOMPARE(fakeFolder.currentLocalState(), fakeFolder.currentRemoteState());
QCOMPARE(fakeFolder.syncEngine().lastLocalDiscoveryStyle(), LocalDiscoveryStyle::FilesystemOnly);
}
};
QTEST_GUILESS_MAIN(TestSyncEngine)