mirror of
https://github.com/qbittorrent/qBittorrent.git
synced 2025-01-10 09:07:25 +03:00
ff9a281b72
Change project directory structure according to application structure. Change 'nox' configuration option to something more meaningful 'nogui'. Rename 'Icons' folder to 'icons' (similar to other folders). Partially add 'nowebui' option support. Remove QConf project file.
509 lines
16 KiB
C++
509 lines
16 KiB
C++
/*
|
|
* Bittorrent Client using Qt4 and libtorrent.
|
|
* Copyright (C) 2012 Christophe Dumez
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*
|
|
* In addition, as a special exception, the copyright holders give permission to
|
|
* link this program with the OpenSSL project's "OpenSSL" library (or with
|
|
* modified versions of it that use the same license as the "OpenSSL" library),
|
|
* and distribute the linked executables. You must obey the GNU General Public
|
|
* License in all respects for all of the code used other than "OpenSSL". If you
|
|
* modify file(s), you may extend this exception to your version of the file(s),
|
|
* but you are not obligated to do so. If you do not wish to do so, delete this
|
|
* exception statement from your version.
|
|
*
|
|
* Contact : chris@qbittorrent.org
|
|
*/
|
|
|
|
#include "rssparser.h"
|
|
#include "downloadthread.h"
|
|
#include "fs_utils.h"
|
|
#include <QDebug>
|
|
#include <QFile>
|
|
#include <QRegExp>
|
|
#include <QStringList>
|
|
#include <QVariant>
|
|
#include <QTextDocument>
|
|
|
|
struct ParsingJob {
|
|
QString feedUrl;
|
|
QString filePath;
|
|
};
|
|
|
|
static const char shortDay[][4] = {
|
|
"Mon", "Tue", "Wed",
|
|
"Thu", "Fri", "Sat",
|
|
"Sun"
|
|
};
|
|
static const char longDay[][10] = {
|
|
"Monday", "Tuesday", "Wednesday",
|
|
"Thursday", "Friday", "Saturday",
|
|
"Sunday"
|
|
};
|
|
static const char shortMonth[][4] = {
|
|
"Jan", "Feb", "Mar", "Apr",
|
|
"May", "Jun", "Jul", "Aug",
|
|
"Sep", "Oct", "Nov", "Dec"
|
|
};
|
|
static const char longMonth[][10] = {
|
|
"January", "February", "March",
|
|
"April", "May", "June",
|
|
"July", "August", "September",
|
|
"October", "November", "December"
|
|
};
|
|
|
|
// Ported to Qt4 from KDElibs4
|
|
QDateTime RssParser::parseDate(const QString &string) {
|
|
const QString str = string.trimmed();
|
|
if (str.isEmpty())
|
|
return QDateTime::currentDateTime();
|
|
|
|
int nyear = 6; // indexes within string to values
|
|
int nmonth = 4;
|
|
int nday = 2;
|
|
int nwday = 1;
|
|
int nhour = 7;
|
|
int nmin = 8;
|
|
int nsec = 9;
|
|
// Also accept obsolete form "Weekday, DD-Mon-YY HH:MM:SS ±hhmm"
|
|
QRegExp rx("^(?:([A-Z][a-z]+),\\s*)?(\\d{1,2})(\\s+|-)([^-\\s]+)(\\s+|-)(\\d{2,4})\\s+(\\d\\d):(\\d\\d)(?::(\\d\\d))?\\s+(\\S+)$");
|
|
QStringList parts;
|
|
if (!str.indexOf(rx)) {
|
|
// Check that if date has '-' separators, both separators are '-'.
|
|
parts = rx.capturedTexts();
|
|
bool h1 = (parts[3] == QLatin1String("-"));
|
|
bool h2 = (parts[5] == QLatin1String("-"));
|
|
if (h1 != h2)
|
|
return QDateTime::currentDateTime();
|
|
} else {
|
|
// Check for the obsolete form "Wdy Mon DD HH:MM:SS YYYY"
|
|
rx = QRegExp("^([A-Z][a-z]+)\\s+(\\S+)\\s+(\\d\\d)\\s+(\\d\\d):(\\d\\d):(\\d\\d)\\s+(\\d\\d\\d\\d)$");
|
|
if (str.indexOf(rx))
|
|
return QDateTime::currentDateTime();
|
|
nyear = 7;
|
|
nmonth = 2;
|
|
nday = 3;
|
|
nwday = 1;
|
|
nhour = 4;
|
|
nmin = 5;
|
|
nsec = 6;
|
|
parts = rx.capturedTexts();
|
|
}
|
|
bool ok[4];
|
|
const int day = parts[nday].toInt(&ok[0]);
|
|
int year = parts[nyear].toInt(&ok[1]);
|
|
const int hour = parts[nhour].toInt(&ok[2]);
|
|
const int minute = parts[nmin].toInt(&ok[3]);
|
|
if (!ok[0] || !ok[1] || !ok[2] || !ok[3])
|
|
return QDateTime::currentDateTime();
|
|
int second = 0;
|
|
if (!parts[nsec].isEmpty()) {
|
|
second = parts[nsec].toInt(&ok[0]);
|
|
if (!ok[0])
|
|
return QDateTime::currentDateTime();
|
|
}
|
|
bool leapSecond = (second == 60);
|
|
if (leapSecond)
|
|
second = 59; // apparently a leap second - validate below, once time zone is known
|
|
int month = 0;
|
|
for ( ; month < 12 && parts[nmonth] != shortMonth[month]; ++month) ;
|
|
int dayOfWeek = -1;
|
|
if (!parts[nwday].isEmpty()) {
|
|
// Look up the weekday name
|
|
while (++dayOfWeek < 7 && shortDay[dayOfWeek] != parts[nwday]) ;
|
|
if (dayOfWeek >= 7)
|
|
for (dayOfWeek = 0; dayOfWeek < 7 && longDay[dayOfWeek] != parts[nwday]; ++dayOfWeek) ;
|
|
}
|
|
// if (month >= 12 || dayOfWeek >= 7
|
|
// || (dayOfWeek < 0 && format == RFCDateDay))
|
|
// return QDateTime;
|
|
int i = parts[nyear].size();
|
|
if (i < 4) {
|
|
// It's an obsolete year specification with less than 4 digits
|
|
year += (i == 2 && year < 50) ? 2000: 1900;
|
|
}
|
|
|
|
// Parse the UTC offset part
|
|
int offset = 0; // set default to '-0000'
|
|
bool negOffset = false;
|
|
if (parts.count() > 10) {
|
|
rx = QRegExp("^([+-])(\\d\\d)(\\d\\d)$");
|
|
if (!parts[10].indexOf(rx)) {
|
|
// It's a UTC offset ±hhmm
|
|
parts = rx.capturedTexts();
|
|
offset = parts[2].toInt(&ok[0]) * 3600;
|
|
int offsetMin = parts[3].toInt(&ok[1]);
|
|
if (!ok[0] || !ok[1] || offsetMin > 59)
|
|
return QDateTime();
|
|
offset += offsetMin * 60;
|
|
negOffset = (parts[1] == QLatin1String("-"));
|
|
if (negOffset)
|
|
offset = -offset;
|
|
} else {
|
|
// Check for an obsolete time zone name
|
|
QByteArray zone = parts[10].toLatin1();
|
|
if (zone.length() == 1 && isalpha(zone[0]) && toupper(zone[0]) != 'J')
|
|
negOffset = true; // military zone: RFC 2822 treats as '-0000'
|
|
else if (zone != "UT" && zone != "GMT") { // treated as '+0000'
|
|
offset = (zone == "EDT") ? -4*3600
|
|
: (zone == "EST" || zone == "CDT") ? -5*3600
|
|
: (zone == "CST" || zone == "MDT") ? -6*3600
|
|
: (zone == "MST" || zone == "PDT") ? -7*3600
|
|
: (zone == "PST") ? -8*3600
|
|
: 0;
|
|
if (!offset) {
|
|
// Check for any other alphabetic time zone
|
|
bool nonalpha = false;
|
|
for (int i = 0, end = zone.size(); i < end && !nonalpha; ++i)
|
|
nonalpha = !isalpha(zone[i]);
|
|
if (nonalpha)
|
|
return QDateTime();
|
|
// TODO: Attempt to recognize the time zone abbreviation?
|
|
negOffset = true; // unknown time zone: RFC 2822 treats as '-0000'
|
|
}
|
|
}
|
|
}
|
|
}
|
|
QDate qdate(year, month+1, day); // convert date, and check for out-of-range
|
|
if (!qdate.isValid())
|
|
return QDateTime::currentDateTime();
|
|
QTime qTime(hour, minute, second);
|
|
|
|
QDateTime result(qdate, qTime, Qt::UTC);
|
|
if (offset)
|
|
result = result.addSecs(-offset);
|
|
if (!result.isValid())
|
|
return QDateTime::currentDateTime(); // invalid date/time
|
|
|
|
if (leapSecond) {
|
|
// Validate a leap second time. Leap seconds are inserted after 23:59:59 UTC.
|
|
// Convert the time to UTC and check that it is 00:00:00.
|
|
if ((hour*3600 + minute*60 + 60 - offset + 86400*5) % 86400) // (max abs(offset) is 100 hours)
|
|
return QDateTime::currentDateTime(); // the time isn't the last second of the day
|
|
}
|
|
return result;
|
|
}
|
|
|
|
RssParser::RssParser(QObject *parent) :
|
|
QThread(parent), m_running(true)
|
|
{
|
|
start();
|
|
}
|
|
|
|
RssParser::~RssParser()
|
|
{
|
|
m_running = false;
|
|
m_waitCondition.wakeOne();
|
|
wait();
|
|
}
|
|
|
|
void RssParser::parseRssFile(const QString& feedUrl, const QString& filePath)
|
|
{
|
|
qDebug() << Q_FUNC_INFO << feedUrl << filePath;
|
|
m_mutex.lock();
|
|
ParsingJob job = { feedUrl, fsutils::fromNativePath(filePath) };
|
|
m_queue.enqueue(job);
|
|
// Wake up thread.
|
|
if (m_queue.count() == 1) {
|
|
qDebug() << Q_FUNC_INFO << "Waking up thread";
|
|
m_waitCondition.wakeOne();
|
|
}
|
|
m_mutex.unlock();
|
|
}
|
|
|
|
void RssParser::clearFeedData(const QString &feedUrl)
|
|
{
|
|
m_mutex.lock();
|
|
m_lastBuildDates.remove(feedUrl);
|
|
m_mutex.unlock();
|
|
}
|
|
|
|
void RssParser::run()
|
|
{
|
|
while (m_running) {
|
|
m_mutex.lock();
|
|
if (!m_queue.empty()) {
|
|
ParsingJob job = m_queue.dequeue();
|
|
m_mutex.unlock();
|
|
parseFeed(job);
|
|
} else {
|
|
qDebug() << Q_FUNC_INFO << "Thread is waiting.";
|
|
m_waitCondition.wait(&m_mutex);
|
|
qDebug() << Q_FUNC_INFO << "Thread woke up.";
|
|
m_mutex.unlock();
|
|
}
|
|
}
|
|
}
|
|
|
|
void RssParser::parseRssArticle(QXmlStreamReader& xml, const QString& feedUrl)
|
|
{
|
|
QVariantHash article;
|
|
|
|
while(!xml.atEnd()) {
|
|
xml.readNext();
|
|
|
|
if(xml.isEndElement() && xml.name() == "item")
|
|
break;
|
|
|
|
if (xml.isStartElement()) {
|
|
if (xml.name() == "title")
|
|
article["title"] = xml.readElementText();
|
|
else if (xml.name() == "enclosure") {
|
|
if (xml.attributes().value("type") == "application/x-bittorrent")
|
|
article["torrent_url"] = xml.attributes().value("url").toString();
|
|
}
|
|
else if (xml.name() == "link")
|
|
article["news_link"] = xml.readElementText();
|
|
else if (xml.name() == "description")
|
|
article["description"] = xml.readElementText();
|
|
else if (xml.name() == "pubDate")
|
|
article["date"] = parseDate(xml.readElementText());
|
|
else if (xml.name() == "author")
|
|
article["author"] = xml.readElementText();
|
|
else if (xml.name() == "guid")
|
|
article["id"] = xml.readElementText();
|
|
}
|
|
}
|
|
|
|
if (!article.contains("id")) {
|
|
// Item does not have a guid, fall back to some other identifier
|
|
const QString link = article.value("news_link").toString();
|
|
if (!link.isEmpty())
|
|
article["id"] = link;
|
|
else {
|
|
const QString title = article.value("title").toString();
|
|
if (!title.isEmpty())
|
|
article["id"] = title;
|
|
else {
|
|
qWarning() << "Item has no guid, link or title, ignoring it...";
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
emit newArticle(feedUrl, article);
|
|
}
|
|
|
|
void RssParser::parseRSSChannel(QXmlStreamReader& xml, const QString& feedUrl)
|
|
{
|
|
qDebug() << Q_FUNC_INFO << feedUrl;
|
|
Q_ASSERT(xml.isStartElement() && xml.name() == "channel");
|
|
|
|
while(!xml.atEnd()) {
|
|
xml.readNext();
|
|
|
|
if (xml.isStartElement()) {
|
|
if (xml.name() == "title") {
|
|
QString title = xml.readElementText();
|
|
emit feedTitle(feedUrl, title);
|
|
}
|
|
else if (xml.name() == "lastBuildDate") {
|
|
QString lastBuildDate = xml.readElementText();
|
|
if (!lastBuildDate.isEmpty()) {
|
|
QMutexLocker locker(&m_mutex);
|
|
if (m_lastBuildDates.value(feedUrl, "") == lastBuildDate) {
|
|
qDebug() << "The RSS feed has not changed since last time, aborting parsing.";
|
|
return;
|
|
}
|
|
m_lastBuildDates[feedUrl] = lastBuildDate;
|
|
}
|
|
}
|
|
else if (xml.name() == "item") {
|
|
parseRssArticle(xml, feedUrl);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void RssParser::parseAtomArticle(QXmlStreamReader& xml, const QString& feedUrl, const QString& baseUrl)
|
|
{
|
|
QVariantHash article;
|
|
bool double_content = false;
|
|
|
|
while(!xml.atEnd()) {
|
|
xml.readNext();
|
|
|
|
if(xml.isEndElement() && xml.name() == "entry")
|
|
break;
|
|
|
|
if (xml.isStartElement()) {
|
|
if (xml.name() == "title") {
|
|
// Workaround for CDATA (QString cannot parse html escapes on it's own)
|
|
QTextDocument doc;
|
|
doc.setHtml(xml.readElementText());
|
|
article["title"] = doc.toPlainText();
|
|
}
|
|
else if (xml.name() == "link") {
|
|
QString theLink = ( xml.attributes().isEmpty() ?
|
|
xml.readElementText() :
|
|
xml.attributes().value("href").toString() );
|
|
|
|
// Atom feeds can have relative links, work around this and
|
|
// take the stress of figuring article full URI from UI
|
|
|
|
// Assemble full URI
|
|
article["news_link"] = ( baseUrl.isEmpty() ?
|
|
theLink :
|
|
baseUrl + theLink );
|
|
}
|
|
else if (xml.name() == "summary" || xml.name() == "content"){
|
|
if(double_content) { // Duplicate content -> ignore
|
|
xml.readNext();
|
|
|
|
while(xml.name() != "summary" && xml.name() != "content")
|
|
xml.readNext();
|
|
|
|
continue;
|
|
}
|
|
|
|
// Try to also parse broken articles, which don't use html '&' escapes
|
|
// Actually works great for non-broken content too
|
|
QString feedText = xml.readElementText(QXmlStreamReader::IncludeChildElements);
|
|
if (!feedText.isEmpty())
|
|
article["description"] = feedText;
|
|
|
|
double_content = true;
|
|
}
|
|
else if (xml.name() == "updated"){
|
|
// ATOM uses standard compliant date, don't do fancy stuff
|
|
QDateTime articleDate = QDateTime::fromString(xml.readElementText(), Qt::ISODate);
|
|
article["date"] = ( articleDate.isValid() ?
|
|
articleDate :
|
|
QDateTime::currentDateTime() );
|
|
}
|
|
else if (xml.name() == "author") {
|
|
xml.readNext();
|
|
while(xml.name() != "author") {
|
|
if(xml.name() == "name")
|
|
article["author"] = xml.readElementText();
|
|
xml.readNext();
|
|
}
|
|
}
|
|
else if (xml.name() == "id")
|
|
article["id"] = xml.readElementText();
|
|
}
|
|
}
|
|
|
|
if (!article.contains("id")) {
|
|
// Item does not have a guid, fall back to some other identifier
|
|
const QString link = article.value("news_link").toString();
|
|
if (!link.isEmpty())
|
|
article["id"] = link;
|
|
else {
|
|
const QString title = article.value("title").toString();
|
|
if (!title.isEmpty())
|
|
article["id"] = title;
|
|
else {
|
|
qWarning() << "Item has no guid, link or title, ignoring it...";
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
emit newArticle(feedUrl, article);
|
|
}
|
|
|
|
void RssParser::parseAtomChannel(QXmlStreamReader& xml, const QString& feedUrl)
|
|
{
|
|
qDebug() << Q_FUNC_INFO << feedUrl;
|
|
Q_ASSERT(xml.isStartElement() && xml.name() == "feed");
|
|
|
|
QString baseURL = xml.attributes().value("xml:base").toString();
|
|
|
|
while(!xml.atEnd()) {
|
|
xml.readNext();
|
|
|
|
if (xml.isStartElement()) {
|
|
if (xml.name() == "title") {
|
|
QString title = xml.readElementText();
|
|
emit feedTitle(feedUrl, title);
|
|
}
|
|
else if (xml.name() == "updated") {
|
|
QString lastBuildDate = xml.readElementText();
|
|
if (!lastBuildDate.isEmpty()) {
|
|
QMutexLocker locker(&m_mutex);
|
|
if (m_lastBuildDates.value(feedUrl) == lastBuildDate) {
|
|
qDebug() << "The RSS feed has not changed since last time, aborting parsing.";
|
|
return;
|
|
}
|
|
m_lastBuildDates[feedUrl] = lastBuildDate;
|
|
}
|
|
}
|
|
else if (xml.name() == "entry") {
|
|
parseAtomArticle(xml, feedUrl, baseURL);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// read and create items from a rss document
|
|
void RssParser::parseFeed(const ParsingJob& job)
|
|
{
|
|
qDebug() << Q_FUNC_INFO << job.feedUrl << job.filePath;
|
|
QFile fileRss(job.filePath);
|
|
if (!fileRss.open(QIODevice::ReadOnly | QIODevice::Text)) {
|
|
reportFailure(job, tr("Failed to open downloaded RSS file."));
|
|
return;
|
|
}
|
|
QXmlStreamReader xml(&fileRss);
|
|
|
|
bool found_channel = false;
|
|
while (xml.readNextStartElement()) {
|
|
if (xml.name() == "rss") {
|
|
// Find channels
|
|
while (xml.readNextStartElement()) {
|
|
if (xml.name() == "channel") {
|
|
parseRSSChannel(xml, job.feedUrl);
|
|
found_channel = true;
|
|
break;
|
|
} else {
|
|
qDebug() << "Skip rss item: " << xml.name();
|
|
xml.skipCurrentElement();
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
else if (xml.name() == "feed") { // Atom feed
|
|
parseAtomChannel(xml, job.feedUrl);
|
|
found_channel = true;
|
|
break;
|
|
} else {
|
|
qDebug() << "Skip root item: " << xml.name();
|
|
xml.skipCurrentElement();
|
|
}
|
|
}
|
|
|
|
if (xml.hasError()) {
|
|
reportFailure(job, xml.errorString());
|
|
return;
|
|
}
|
|
|
|
if (!found_channel) {
|
|
reportFailure(job, tr("Invalid RSS feed at %1.").arg(job.feedUrl));
|
|
return;
|
|
}
|
|
|
|
// Clean up
|
|
fileRss.close();
|
|
emit feedParsingFinished(job.feedUrl, QString());
|
|
fsutils::forceRemove(job.filePath);
|
|
}
|
|
|
|
void RssParser::reportFailure(const ParsingJob& job, const QString& error)
|
|
{
|
|
emit feedParsingFinished(job.feedUrl, error);
|
|
fsutils::forceRemove(job.filePath);
|
|
}
|