From 585d2b20bdc5dcdb76c696b4e0306ddd3ce19329 Mon Sep 17 00:00:00 2001 From: Christian Kamm Date: Tue, 19 Dec 2017 13:34:38 +0100 Subject: [PATCH] Exclude regex: Restore old matching on Windows #6245 Unfortunately matching behaved differently on Windows. This patch restores the previous matching behavior but still uses the new regular expression based matching. Further work will hopefully unify the behavior between platforms without breaking backwards compatibility. --- src/csync/csync_exclude.cpp | 71 ++++++++++-- src/csync/csync_exclude.h | 13 +++ .../csync/csync_tests/check_csync_exclude.cpp | 101 +++++++++++++++++- 3 files changed, 177 insertions(+), 8 deletions(-) diff --git a/src/csync/csync_exclude.cpp b/src/csync/csync_exclude.cpp index be3d80ae8..524daed27 100644 --- a/src/csync/csync_exclude.cpp +++ b/src/csync/csync_exclude.cpp @@ -241,6 +241,8 @@ using namespace OCC; ExcludedFiles::ExcludedFiles() { + // Windows used to use PathMatchSpec which allows *foo to match abc/deffoo. + _wildcardsMatchSlash = Utility::isWindows(); } ExcludedFiles::~ExcludedFiles() @@ -270,6 +272,12 @@ void ExcludedFiles::clearManualExcludes() reloadExcludeFiles(); } +void ExcludedFiles::setWildcardsMatchSlash(bool onoff) +{ + _wildcardsMatchSlash = onoff; + prepare(); +} + bool ExcludedFiles::reloadExcludeFiles() { _allExcludes.clear(); @@ -409,7 +417,12 @@ auto ExcludedFiles::csyncTraversalMatchFun() const return [this](const char *path, ItemType filetype) { return this->traversalPatternMatch(path, filetype); }; } -static QString convertToRegexpSyntax(QString exclude) +/** + * On linux we used to use fnmatch with FNM_PATHNAME, but the windows function we used + * didn't have that behavior. wildcardsMatchSlash can be used to control which behavior + * the resulting regex shall use. + */ +static QString convertToRegexpSyntax(QString exclude, bool wildcardsMatchSlash) { // Translate *, ?, [...] to their regex variants. // The escape sequences \*, \?, \[. \\ have a special meaning, @@ -433,11 +446,19 @@ static QString convertToRegexpSyntax(QString exclude) switch (exclude[i].unicode()) { case '*': flush(); - regex.append("[^/]*"); + if (wildcardsMatchSlash) { + regex.append(".*"); + } else { + regex.append("[^/]*"); + } break; case '?': flush(); - regex.append("[^/]"); + if (wildcardsMatchSlash) { + regex.append("."); + } else { + regex.append("[^/]"); + } break; case '[': { flush(); @@ -491,6 +512,42 @@ static QString convertToRegexpSyntax(QString exclude) return regex; } +static QString extractBnameTrigger(const QString &exclude, bool wildcardsMatchSlash) +{ + // We can definitely drop everything to the left of a / - that will never match + // any bname. + QString pattern = exclude.mid(exclude.lastIndexOf('/') + 1); + + // Easy case, nothing else can match a slash, so that's it. + if (!wildcardsMatchSlash) + return pattern; + + // Otherwise it's more complicated. Examples: + // - "foo*bar" can match "fooX/Xbar", pattern is "*bar" + // - "foo*bar*" can match "fooX/XbarX", pattern is "*bar*" + // - "foo?bar" can match "foo/bar" but also "fooXbar", pattern is "*bar" + + auto isWildcard = [](QChar c) { return c == QLatin1Char('*') || c == QLatin1Char('?'); }; + + // First, skip wildcards on the very right of the pattern + int i = pattern.size() - 1; + while (i >= 0 && isWildcard(pattern[i])) + --i; + + // Then scan further until the next wildcard that could match a / + while (i >= 0 && !isWildcard(pattern[i])) + --i; + + // Everything to the right is part of the pattern + pattern = pattern.mid(i + 1); + + // And if there was a wildcard, it starts with a * + if (i >= 0) + pattern.prepend('*'); + + return pattern; +} + void ExcludedFiles::prepare() { // Build regular expressions for the different cases. @@ -556,15 +613,15 @@ void ExcludedFiles::prepare() auto &fullFileDir = removeExcluded ? fullFileDirRemove : fullFileDirKeep; auto &fullDir = removeExcluded ? fullDirRemove : fullDirKeep; - auto regexExclude = convertToRegexpSyntax(QString::fromUtf8(exclude)); + auto regexExclude = convertToRegexpSyntax(QString::fromUtf8(exclude), _wildcardsMatchSlash); if (!fullPath) { regexAppend(bnameFileDir, bnameDir, regexExclude, matchDirOnly); } else { regexAppend(fullFileDir, fullDir, regexExclude, matchDirOnly); - // for activation, trigger on the 'bname' part of the full pattern - auto bnameExclude = exclude.mid(exclude.lastIndexOf('/') + 1); - auto regexBname = convertToRegexpSyntax(bnameExclude); + // For activation, trigger on the 'bname' part of the full pattern. + QString bnameExclude = extractBnameTrigger(exclude, _wildcardsMatchSlash); + auto regexBname = convertToRegexpSyntax(bnameExclude, true); regexAppend(bnameTriggerFileDir, bnameTriggerDir, regexBname, matchDirOnly); } } diff --git a/src/csync/csync_exclude.h b/src/csync/csync_exclude.h index 6f53e64a0..950a0f46f 100644 --- a/src/csync/csync_exclude.h +++ b/src/csync/csync_exclude.h @@ -109,6 +109,11 @@ public: */ void clearManualExcludes(); + /** + * Adjusts behavior of wildcards. Only used for testing. + */ + void setWildcardsMatchSlash(bool onoff); + /** * Generate a hook for traversal exclude pattern matching * that csync can use. @@ -200,6 +205,14 @@ private: bool _excludeConflictFiles = true; + /** + * Whether * and ? in patterns can match a / + * + * Unfortunately this was how matching was done on Windows so + * it continues to be enabled there. + */ + bool _wildcardsMatchSlash = false; + friend class ExcludedFilesTest; }; diff --git a/test/csync/csync_tests/check_csync_exclude.cpp b/test/csync/csync_tests/check_csync_exclude.cpp index 0062c2a39..41f3261d5 100644 --- a/test/csync/csync_tests/check_csync_exclude.cpp +++ b/test/csync/csync_tests/check_csync_exclude.cpp @@ -40,6 +40,7 @@ static int setup(void **state) { csync = new CSYNC("/tmp/check_csync1", new OCC::SyncJournalDb("")); excludedFiles = new ExcludedFiles; + excludedFiles->setWildcardsMatchSlash(false); csync->exclude_traversal_fn = excludedFiles->csyncTraversalMatchFun(); *state = csync; @@ -51,6 +52,7 @@ static int setup_init(void **state) { csync = new CSYNC("/tmp/check_csync1", new OCC::SyncJournalDb("")); excludedFiles = new ExcludedFiles; + excludedFiles->setWildcardsMatchSlash(false); csync->exclude_traversal_fn = excludedFiles->csyncTraversalMatchFun(); excludedFiles->addExcludeFilePath(EXCLUDE_LIST_FILE); @@ -429,7 +431,101 @@ static void check_csync_pathes(void **) assert_int_equal(check_dir_full("/excludepath/withsubdir/foo"), CSYNC_FILE_EXCLUDE_LIST); } -static void check_csync_is_windows_reserved_word(void **) { +static void check_csync_wildcards(void **) +{ + excludedFiles->addManualExclude("a/foo*bar"); + excludedFiles->addManualExclude("b/foo*bar*"); + excludedFiles->addManualExclude("c/foo?bar"); + excludedFiles->addManualExclude("d/foo?bar*"); + excludedFiles->addManualExclude("e/foo?bar?"); + excludedFiles->addManualExclude("g/bar*"); + excludedFiles->addManualExclude("h/bar?"); + + excludedFiles->setWildcardsMatchSlash(false); + + assert_int_equal(check_file_traversal("a/fooXYZbar"), CSYNC_FILE_EXCLUDE_LIST); + assert_int_equal(check_file_traversal("a/fooX/Zbar"), CSYNC_NOT_EXCLUDED); + + assert_int_equal(check_file_traversal("b/fooXYZbarABC"), CSYNC_FILE_EXCLUDE_LIST); + assert_int_equal(check_file_traversal("b/fooX/ZbarABC"), CSYNC_NOT_EXCLUDED); + + assert_int_equal(check_file_traversal("c/fooXbar"), CSYNC_FILE_EXCLUDE_LIST); + assert_int_equal(check_file_traversal("c/foo/bar"), CSYNC_NOT_EXCLUDED); + + assert_int_equal(check_file_traversal("d/fooXbarABC"), CSYNC_FILE_EXCLUDE_LIST); + assert_int_equal(check_file_traversal("d/foo/barABC"), CSYNC_NOT_EXCLUDED); + + assert_int_equal(check_file_traversal("e/fooXbarA"), CSYNC_FILE_EXCLUDE_LIST); + assert_int_equal(check_file_traversal("e/foo/barA"), CSYNC_NOT_EXCLUDED); + + assert_int_equal(check_file_traversal("g/barABC"), CSYNC_FILE_EXCLUDE_LIST); + assert_int_equal(check_file_traversal("g/XbarABC"), CSYNC_NOT_EXCLUDED); + + assert_int_equal(check_file_traversal("h/barZ"), CSYNC_FILE_EXCLUDE_LIST); + assert_int_equal(check_file_traversal("h/XbarZ"), CSYNC_NOT_EXCLUDED); + + excludedFiles->setWildcardsMatchSlash(true); + + assert_int_equal(check_file_traversal("a/fooX/Zbar"), CSYNC_FILE_EXCLUDE_LIST); + assert_int_equal(check_file_traversal("b/fooX/ZbarABC"), CSYNC_FILE_EXCLUDE_LIST); + assert_int_equal(check_file_traversal("c/foo/bar"), CSYNC_FILE_EXCLUDE_LIST); + assert_int_equal(check_file_traversal("d/foo/barABC"), CSYNC_FILE_EXCLUDE_LIST); + assert_int_equal(check_file_traversal("e/foo/barA"), CSYNC_FILE_EXCLUDE_LIST); +} + +static void check_csync_regex_translation(void **) +{ + QByteArray storage; + auto translate = [&storage](const char *pattern) { + storage = convertToRegexpSyntax(pattern, false).toUtf8(); + return storage.constData(); + }; + + assert_string_equal(translate(""), ""); + assert_string_equal(translate("abc"), "abc"); + assert_string_equal(translate("a*c"), "a[^/]*c"); + assert_string_equal(translate("a?c"), "a[^/]c"); + assert_string_equal(translate("a[xyz]c"), "a[xyz]c"); + assert_string_equal(translate("a[xyzc"), "a\\[xyzc"); + assert_string_equal(translate("a[!xyz]c"), "a[^xyz]c"); + assert_string_equal(translate("a\\*b\\?c\\[d\\\\e"), "a\\*b\\?c\\[d\\\\e"); + assert_string_equal(translate("a.c"), "a\\.c"); + assert_string_equal(translate("?𠜎?"), "[^/]\\𠜎[^/]"); // 𠜎 is 4-byte utf8 +} + +static void check_csync_bname_trigger(void **) +{ + bool wildcardsMatchSlash = false; + QByteArray storage; + auto translate = [&storage, &wildcardsMatchSlash](const char *pattern) { + storage = extractBnameTrigger(pattern, wildcardsMatchSlash).toUtf8(); + return storage.constData(); + }; + + assert_string_equal(translate(""), ""); + assert_string_equal(translate("a/b/"), ""); + assert_string_equal(translate("a/b/c"), "c"); + assert_string_equal(translate("c"), "c"); + assert_string_equal(translate("a/foo*"), "foo*"); + assert_string_equal(translate("a/abc*foo*"), "abc*foo*"); + + wildcardsMatchSlash = true; + + assert_string_equal(translate(""), ""); + assert_string_equal(translate("a/b/"), ""); + assert_string_equal(translate("a/b/c"), "c"); + assert_string_equal(translate("c"), "c"); + assert_string_equal(translate("*"), "*"); + assert_string_equal(translate("a/foo*"), "foo*"); + assert_string_equal(translate("a/abc?foo*"), "*foo*"); + assert_string_equal(translate("a/abc*foo*"), "*foo*"); + assert_string_equal(translate("a/abc?foo?"), "*foo?"); + assert_string_equal(translate("a/abc*foo?*"), "*foo?*"); + assert_string_equal(translate("a/abc*/foo*"), "foo*"); +} + +static void check_csync_is_windows_reserved_word(void **) +{ assert_true(csync_is_windows_reserved_word("CON")); assert_true(csync_is_windows_reserved_word("con")); assert_true(csync_is_windows_reserved_word("CON.")); @@ -537,6 +633,9 @@ int torture_run_tests(void) cmocka_unit_test_setup_teardown(T::check_csync_excluded_traversal, T::setup_init, T::teardown), cmocka_unit_test_setup_teardown(T::check_csync_dir_only, T::setup, T::teardown), cmocka_unit_test_setup_teardown(T::check_csync_pathes, T::setup_init, T::teardown), + cmocka_unit_test_setup_teardown(T::check_csync_wildcards, T::setup, T::teardown), + cmocka_unit_test_setup_teardown(T::check_csync_regex_translation, T::setup, T::teardown), + cmocka_unit_test_setup_teardown(T::check_csync_bname_trigger, T::setup, T::teardown), cmocka_unit_test_setup_teardown(T::check_csync_is_windows_reserved_word, T::setup_init, T::teardown), cmocka_unit_test_setup_teardown(T::check_csync_excluded_performance, T::setup_init, T::teardown), cmocka_unit_test(T::check_csync_exclude_expand_escapes),