Exclude regex: Restore old matching on Windows #6245

Unfortunately matching behaved differently on Windows. This patch
restores the previous matching behavior but still uses the new regular
expression based matching.

Further work will hopefully unify the behavior between platforms without
breaking backwards compatibility.
This commit is contained in:
Christian Kamm 2017-12-19 13:34:38 +01:00 committed by ckamm
parent d0e7f61db6
commit 585d2b20bd
3 changed files with 177 additions and 8 deletions

View file

@ -241,6 +241,8 @@ using namespace OCC;
ExcludedFiles::ExcludedFiles()
{
// Windows used to use PathMatchSpec which allows *foo to match abc/deffoo.
_wildcardsMatchSlash = Utility::isWindows();
}
ExcludedFiles::~ExcludedFiles()
@ -270,6 +272,12 @@ void ExcludedFiles::clearManualExcludes()
reloadExcludeFiles();
}
void ExcludedFiles::setWildcardsMatchSlash(bool onoff)
{
_wildcardsMatchSlash = onoff;
prepare();
}
bool ExcludedFiles::reloadExcludeFiles()
{
_allExcludes.clear();
@ -409,7 +417,12 @@ auto ExcludedFiles::csyncTraversalMatchFun() const
return [this](const char *path, ItemType filetype) { return this->traversalPatternMatch(path, filetype); };
}
static QString convertToRegexpSyntax(QString exclude)
/**
* On linux we used to use fnmatch with FNM_PATHNAME, but the windows function we used
* didn't have that behavior. wildcardsMatchSlash can be used to control which behavior
* the resulting regex shall use.
*/
static QString convertToRegexpSyntax(QString exclude, bool wildcardsMatchSlash)
{
// Translate *, ?, [...] to their regex variants.
// The escape sequences \*, \?, \[. \\ have a special meaning,
@ -433,11 +446,19 @@ static QString convertToRegexpSyntax(QString exclude)
switch (exclude[i].unicode()) {
case '*':
flush();
regex.append("[^/]*");
if (wildcardsMatchSlash) {
regex.append(".*");
} else {
regex.append("[^/]*");
}
break;
case '?':
flush();
regex.append("[^/]");
if (wildcardsMatchSlash) {
regex.append(".");
} else {
regex.append("[^/]");
}
break;
case '[': {
flush();
@ -491,6 +512,42 @@ static QString convertToRegexpSyntax(QString exclude)
return regex;
}
static QString extractBnameTrigger(const QString &exclude, bool wildcardsMatchSlash)
{
// We can definitely drop everything to the left of a / - that will never match
// any bname.
QString pattern = exclude.mid(exclude.lastIndexOf('/') + 1);
// Easy case, nothing else can match a slash, so that's it.
if (!wildcardsMatchSlash)
return pattern;
// Otherwise it's more complicated. Examples:
// - "foo*bar" can match "fooX/Xbar", pattern is "*bar"
// - "foo*bar*" can match "fooX/XbarX", pattern is "*bar*"
// - "foo?bar" can match "foo/bar" but also "fooXbar", pattern is "*bar"
auto isWildcard = [](QChar c) { return c == QLatin1Char('*') || c == QLatin1Char('?'); };
// First, skip wildcards on the very right of the pattern
int i = pattern.size() - 1;
while (i >= 0 && isWildcard(pattern[i]))
--i;
// Then scan further until the next wildcard that could match a /
while (i >= 0 && !isWildcard(pattern[i]))
--i;
// Everything to the right is part of the pattern
pattern = pattern.mid(i + 1);
// And if there was a wildcard, it starts with a *
if (i >= 0)
pattern.prepend('*');
return pattern;
}
void ExcludedFiles::prepare()
{
// Build regular expressions for the different cases.
@ -556,15 +613,15 @@ void ExcludedFiles::prepare()
auto &fullFileDir = removeExcluded ? fullFileDirRemove : fullFileDirKeep;
auto &fullDir = removeExcluded ? fullDirRemove : fullDirKeep;
auto regexExclude = convertToRegexpSyntax(QString::fromUtf8(exclude));
auto regexExclude = convertToRegexpSyntax(QString::fromUtf8(exclude), _wildcardsMatchSlash);
if (!fullPath) {
regexAppend(bnameFileDir, bnameDir, regexExclude, matchDirOnly);
} else {
regexAppend(fullFileDir, fullDir, regexExclude, matchDirOnly);
// for activation, trigger on the 'bname' part of the full pattern
auto bnameExclude = exclude.mid(exclude.lastIndexOf('/') + 1);
auto regexBname = convertToRegexpSyntax(bnameExclude);
// For activation, trigger on the 'bname' part of the full pattern.
QString bnameExclude = extractBnameTrigger(exclude, _wildcardsMatchSlash);
auto regexBname = convertToRegexpSyntax(bnameExclude, true);
regexAppend(bnameTriggerFileDir, bnameTriggerDir, regexBname, matchDirOnly);
}
}

View file

@ -109,6 +109,11 @@ public:
*/
void clearManualExcludes();
/**
* Adjusts behavior of wildcards. Only used for testing.
*/
void setWildcardsMatchSlash(bool onoff);
/**
* Generate a hook for traversal exclude pattern matching
* that csync can use.
@ -200,6 +205,14 @@ private:
bool _excludeConflictFiles = true;
/**
* Whether * and ? in patterns can match a /
*
* Unfortunately this was how matching was done on Windows so
* it continues to be enabled there.
*/
bool _wildcardsMatchSlash = false;
friend class ExcludedFilesTest;
};

View file

@ -40,6 +40,7 @@ static int setup(void **state) {
csync = new CSYNC("/tmp/check_csync1", new OCC::SyncJournalDb(""));
excludedFiles = new ExcludedFiles;
excludedFiles->setWildcardsMatchSlash(false);
csync->exclude_traversal_fn = excludedFiles->csyncTraversalMatchFun();
*state = csync;
@ -51,6 +52,7 @@ static int setup_init(void **state) {
csync = new CSYNC("/tmp/check_csync1", new OCC::SyncJournalDb(""));
excludedFiles = new ExcludedFiles;
excludedFiles->setWildcardsMatchSlash(false);
csync->exclude_traversal_fn = excludedFiles->csyncTraversalMatchFun();
excludedFiles->addExcludeFilePath(EXCLUDE_LIST_FILE);
@ -429,7 +431,101 @@ static void check_csync_pathes(void **)
assert_int_equal(check_dir_full("/excludepath/withsubdir/foo"), CSYNC_FILE_EXCLUDE_LIST);
}
static void check_csync_is_windows_reserved_word(void **) {
static void check_csync_wildcards(void **)
{
excludedFiles->addManualExclude("a/foo*bar");
excludedFiles->addManualExclude("b/foo*bar*");
excludedFiles->addManualExclude("c/foo?bar");
excludedFiles->addManualExclude("d/foo?bar*");
excludedFiles->addManualExclude("e/foo?bar?");
excludedFiles->addManualExclude("g/bar*");
excludedFiles->addManualExclude("h/bar?");
excludedFiles->setWildcardsMatchSlash(false);
assert_int_equal(check_file_traversal("a/fooXYZbar"), CSYNC_FILE_EXCLUDE_LIST);
assert_int_equal(check_file_traversal("a/fooX/Zbar"), CSYNC_NOT_EXCLUDED);
assert_int_equal(check_file_traversal("b/fooXYZbarABC"), CSYNC_FILE_EXCLUDE_LIST);
assert_int_equal(check_file_traversal("b/fooX/ZbarABC"), CSYNC_NOT_EXCLUDED);
assert_int_equal(check_file_traversal("c/fooXbar"), CSYNC_FILE_EXCLUDE_LIST);
assert_int_equal(check_file_traversal("c/foo/bar"), CSYNC_NOT_EXCLUDED);
assert_int_equal(check_file_traversal("d/fooXbarABC"), CSYNC_FILE_EXCLUDE_LIST);
assert_int_equal(check_file_traversal("d/foo/barABC"), CSYNC_NOT_EXCLUDED);
assert_int_equal(check_file_traversal("e/fooXbarA"), CSYNC_FILE_EXCLUDE_LIST);
assert_int_equal(check_file_traversal("e/foo/barA"), CSYNC_NOT_EXCLUDED);
assert_int_equal(check_file_traversal("g/barABC"), CSYNC_FILE_EXCLUDE_LIST);
assert_int_equal(check_file_traversal("g/XbarABC"), CSYNC_NOT_EXCLUDED);
assert_int_equal(check_file_traversal("h/barZ"), CSYNC_FILE_EXCLUDE_LIST);
assert_int_equal(check_file_traversal("h/XbarZ"), CSYNC_NOT_EXCLUDED);
excludedFiles->setWildcardsMatchSlash(true);
assert_int_equal(check_file_traversal("a/fooX/Zbar"), CSYNC_FILE_EXCLUDE_LIST);
assert_int_equal(check_file_traversal("b/fooX/ZbarABC"), CSYNC_FILE_EXCLUDE_LIST);
assert_int_equal(check_file_traversal("c/foo/bar"), CSYNC_FILE_EXCLUDE_LIST);
assert_int_equal(check_file_traversal("d/foo/barABC"), CSYNC_FILE_EXCLUDE_LIST);
assert_int_equal(check_file_traversal("e/foo/barA"), CSYNC_FILE_EXCLUDE_LIST);
}
static void check_csync_regex_translation(void **)
{
QByteArray storage;
auto translate = [&storage](const char *pattern) {
storage = convertToRegexpSyntax(pattern, false).toUtf8();
return storage.constData();
};
assert_string_equal(translate(""), "");
assert_string_equal(translate("abc"), "abc");
assert_string_equal(translate("a*c"), "a[^/]*c");
assert_string_equal(translate("a?c"), "a[^/]c");
assert_string_equal(translate("a[xyz]c"), "a[xyz]c");
assert_string_equal(translate("a[xyzc"), "a\\[xyzc");
assert_string_equal(translate("a[!xyz]c"), "a[^xyz]c");
assert_string_equal(translate("a\\*b\\?c\\[d\\\\e"), "a\\*b\\?c\\[d\\\\e");
assert_string_equal(translate("a.c"), "a\\.c");
assert_string_equal(translate("?𠜎?"), "[^/]\\𠜎[^/]"); // 𠜎 is 4-byte utf8
}
static void check_csync_bname_trigger(void **)
{
bool wildcardsMatchSlash = false;
QByteArray storage;
auto translate = [&storage, &wildcardsMatchSlash](const char *pattern) {
storage = extractBnameTrigger(pattern, wildcardsMatchSlash).toUtf8();
return storage.constData();
};
assert_string_equal(translate(""), "");
assert_string_equal(translate("a/b/"), "");
assert_string_equal(translate("a/b/c"), "c");
assert_string_equal(translate("c"), "c");
assert_string_equal(translate("a/foo*"), "foo*");
assert_string_equal(translate("a/abc*foo*"), "abc*foo*");
wildcardsMatchSlash = true;
assert_string_equal(translate(""), "");
assert_string_equal(translate("a/b/"), "");
assert_string_equal(translate("a/b/c"), "c");
assert_string_equal(translate("c"), "c");
assert_string_equal(translate("*"), "*");
assert_string_equal(translate("a/foo*"), "foo*");
assert_string_equal(translate("a/abc?foo*"), "*foo*");
assert_string_equal(translate("a/abc*foo*"), "*foo*");
assert_string_equal(translate("a/abc?foo?"), "*foo?");
assert_string_equal(translate("a/abc*foo?*"), "*foo?*");
assert_string_equal(translate("a/abc*/foo*"), "foo*");
}
static void check_csync_is_windows_reserved_word(void **)
{
assert_true(csync_is_windows_reserved_word("CON"));
assert_true(csync_is_windows_reserved_word("con"));
assert_true(csync_is_windows_reserved_word("CON."));
@ -537,6 +633,9 @@ int torture_run_tests(void)
cmocka_unit_test_setup_teardown(T::check_csync_excluded_traversal, T::setup_init, T::teardown),
cmocka_unit_test_setup_teardown(T::check_csync_dir_only, T::setup, T::teardown),
cmocka_unit_test_setup_teardown(T::check_csync_pathes, T::setup_init, T::teardown),
cmocka_unit_test_setup_teardown(T::check_csync_wildcards, T::setup, T::teardown),
cmocka_unit_test_setup_teardown(T::check_csync_regex_translation, T::setup, T::teardown),
cmocka_unit_test_setup_teardown(T::check_csync_bname_trigger, T::setup, T::teardown),
cmocka_unit_test_setup_teardown(T::check_csync_is_windows_reserved_word, T::setup_init, T::teardown),
cmocka_unit_test_setup_teardown(T::check_csync_excluded_performance, T::setup_init, T::teardown),
cmocka_unit_test(T::check_csync_exclude_expand_escapes),