Allow | in RSS must contain. Closes #6171.

--HG--
branch : magao-dev
This commit is contained in:
Tim Delaney 2016-11-07 17:46:27 +11:00
parent 2244b7cb66
commit b84d9201fd
2 changed files with 83 additions and 22 deletions

View file

@ -50,35 +50,92 @@ DownloadRule::DownloadRule()
bool DownloadRule::matches(const QString &articleTitle) const bool DownloadRule::matches(const QString &articleTitle) const
{ {
QRegExp whitespace("\\s+");
if (!m_mustContain.empty()) { if (!m_mustContain.empty()) {
bool logged = false; bool logged = false;
bool foundMustContain = true;
// Each expression is either a regex, or a set of wildcards separated by whitespace.
// Accept if any complete expression matches.
foreach (const QString &expression, m_mustContain) {
if (expression.isEmpty())
continue;
foreach (const QString &token, m_mustContain) {
if (!token.isEmpty()) {
if (!logged) { if (!logged) {
qDebug() << "Checking matching expressions:" << m_mustContain.join("|"); qDebug() << "Checking matching expressions:" << m_mustContain.join("|");
logged = true; logged = true;
} }
QRegExp reg(token, Qt::CaseInsensitive, m_useRegex ? QRegExp::RegExp : QRegExp::Wildcard); if (m_useRegex) {
if (reg.indexIn(articleTitle) < 0) QRegExp reg(expression, Qt::CaseInsensitive, QRegExp::RegExp);
if (reg.indexIn(articleTitle) > -1)
foundMustContain = true;
}
else {
// Only accept if every wildcard token (separated by spaces) is present in the article name.
// Order of wildcard tokens is unimportant (if order is important, they should have used *).
foundMustContain = true;
foreach (const QString &wildcard, expression.split(whitespace, QString::SplitBehavior::SkipEmptyParts)) {
QRegExp reg(wildcard, Qt::CaseInsensitive, QRegExp::Wildcard);
if (reg.indexIn(articleTitle) == -1) {
foundMustContain = false;
break;
}
}
}
if (foundMustContain) {
qDebug() << "Found matching expression:" << expression;
break;
}
}
if (!foundMustContain)
return false; return false;
} }
}
}
if (!m_mustNotContain.empty()) { if (!m_mustNotContain.empty()) {
bool logged = false; bool logged = false;
foreach (const QString &token, m_mustNotContain) { // Each expression is either a regex, or a set of wildcards separated by whitespace.
if (!token.isEmpty()) { // Reject if any complete expression matches.
foreach (const QString &expression, m_mustNotContain) {
if (expression.isEmpty())
continue;
if (!logged) { if (!logged) {
qDebug() << "Checking not matching expressions:" << m_mustNotContain.join("|"); qDebug() << "Checking not matching expressions:" << m_mustNotContain.join("|");
logged = true; logged = true;
} }
QRegExp reg(token, Qt::CaseInsensitive, m_useRegex ? QRegExp::RegExp : QRegExp::Wildcard); if (m_useRegex) {
if (reg.indexIn(articleTitle) > -1) QRegExp reg(expression, Qt::CaseInsensitive, QRegExp::RegExp);
if (reg.indexIn(articleTitle) > -1) {
qDebug() << "Found not matching expression:" << expression;
return false;
}
}
// Only reject if every wildcard token (separated by spaces) is present in the article name.
// Order of wildcard tokens is unimportant (if order is important, they should have used *).
bool foundMustNotContain = true;
foreach (const QString &wildcard, expression.split(whitespace, QString::SplitBehavior::SkipEmptyParts)) {
QRegExp reg(wildcard, Qt::CaseInsensitive, QRegExp::Wildcard);
if (reg.indexIn(articleTitle) == -1) {
foundMustNotContain = false;
break;
}
}
if (foundMustNotContain) {
qDebug()<< "Found not matching expression:" << expression;
return false; return false;
} }
} }
@ -162,7 +219,7 @@ void DownloadRule::setMustContain(const QString &tokens)
if (m_useRegex) if (m_useRegex)
m_mustContain = QStringList() << tokens; m_mustContain = QStringList() << tokens;
else else
m_mustContain = tokens.split(" "); m_mustContain = tokens.split("|");
} }
void DownloadRule::setMustNotContain(const QString &tokens) void DownloadRule::setMustNotContain(const QString &tokens)
@ -220,7 +277,7 @@ QVariantHash DownloadRule::toVariantHash() const
{ {
QVariantHash hash; QVariantHash hash;
hash["name"] = m_name; hash["name"] = m_name;
hash["must_contain"] = m_mustContain.join(" "); hash["must_contain"] = m_mustContain.join("|");
hash["must_not_contain"] = m_mustNotContain.join("|"); hash["must_not_contain"] = m_mustNotContain.join("|");
hash["save_path"] = m_savePath; hash["save_path"] = m_savePath;
hash["affected_feeds"] = m_rssFeeds; hash["affected_feeds"] = m_rssFeeds;
@ -296,7 +353,7 @@ int DownloadRule::ignoreDays() const
QString DownloadRule::mustContain() const QString DownloadRule::mustContain() const
{ {
return m_mustContain.join(" "); return m_mustContain.join("|");
} }
QString DownloadRule::mustNotContain() const QString DownloadRule::mustNotContain() const

View file

@ -578,14 +578,18 @@ void AutomatedRssDownloader::updateFieldsToolTips(bool regex)
{ {
QString tip; QString tip;
if (regex) { if (regex) {
tip = tr("Regex mode: use Perl-like regular expressions"); tip = "<p>" + tr("Regex mode: use Perl-like regular expressions") + "</p>";
ui->lineContains->setToolTip(tip); ui->lineContains->setToolTip(tip);
ui->lineNotContains->setToolTip(tip); ui->lineNotContains->setToolTip(tip);
} }
else { else {
tip = tr("Wildcard mode: you can use<ul><li>? to match any single character</li><li>* to match zero or more of any characters</li><li>Whitespaces count as AND operators</li></ul>"); tip = "<p>" + tr("Wildcard mode: you can use") + "<ul>"
+ "<li>" + tr("? to match any single character") + "</li>"
+ "<li>" + tr("* to match zero or more of any characters") + "</li>"
+ "<li>" + tr("Whitespaces count as AND operators (all words, any order)") + "</li>"
+ "<li>" + tr("| is used as OR operator") + "</li></ul></p>"
+ "<p>" + tr("If word order is important use * instead of whitespace.") + "</p>";
ui->lineContains->setToolTip(tip); ui->lineContains->setToolTip(tip);
tip = tr("Wildcard mode: you can use<ul><li>? to match any single character</li><li>* to match zero or more of any characters</li><li>| is used as OR operator</li></ul>");
ui->lineNotContains->setToolTip(tip); ui->lineNotContains->setToolTip(tip);
} }
} }