mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2024-11-22 01:25:28 +03:00
refactor: FeedExpander::parseItem() descendants (#3744)
This commit is contained in:
parent
9bda9e246a
commit
382648fc22
46 changed files with 314 additions and 281 deletions
|
@ -20,17 +20,16 @@ class AcrimedBridge extends FeedExpander
|
|||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas(
|
||||
static::URI . 'spip.php?page=backend',
|
||||
$this->getInput('limit')
|
||||
);
|
||||
$url = 'https://www.acrimed.org/spip.php?page=backend';
|
||||
$limit = $this->getInput('limit');
|
||||
$this->collectExpandableDatas($url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$articlePage = getSimpleHTMLDOM($newsItem->link);
|
||||
$articlePage = getSimpleHTMLDOM($item['uri']);
|
||||
$article = sanitize($articlePage->find('article.article1', 0)->innertext);
|
||||
$article = defaultLinkTo($article, static::URI);
|
||||
$item['content'] = $article;
|
||||
|
|
|
@ -33,9 +33,9 @@ class ArsTechnicaBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url);
|
||||
}
|
||||
|
||||
protected function parseItem($newItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$item_html = getSimpleHTMLDOMCached($item['uri'] . '&');
|
||||
$item_html = defaultLinkTo($item_html, self::URI);
|
||||
|
|
|
@ -7,6 +7,12 @@ class BleepingComputerBridge extends FeedExpander
|
|||
const URI = 'https://www.bleepingcomputer.com/';
|
||||
const DESCRIPTION = 'Returns the newest articles.';
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$feed = static::URI . 'feed/';
|
||||
$this->collectExpandableDatas($feed);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
@ -23,10 +29,4 @@ class BleepingComputerBridge extends FeedExpander
|
|||
|
||||
return $item;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$feed = static::URI . 'feed/';
|
||||
$this->collectExpandableDatas($feed);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,9 +43,9 @@ class CNETFranceBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('https://www.cnetfrance.fr/feeds/rss/news/');
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
foreach ($this->bannedTitle as $term) {
|
||||
if (preg_match('/' . $term . '/mi', $item['title']) === 1) {
|
||||
|
@ -54,8 +54,7 @@ class CNETFranceBridge extends FeedExpander
|
|||
}
|
||||
|
||||
foreach ($this->bannedURL as $term) {
|
||||
$preg_match = preg_match('#' . $term . '#mi', $item['uri']);
|
||||
if ($preg_match === 1) {
|
||||
if (preg_match('#' . $term . '#mi', $item['uri'])) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,9 +34,9 @@ class CaschyBridge extends FeedExpander
|
|||
);
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
if (strpos($item['uri'], 'https://stadt-bremerhaven.de/') !== 0) {
|
||||
return $item;
|
||||
|
|
|
@ -12,9 +12,9 @@ class CommonDreamsBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('http://www.commondreams.org/rss.xml', 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item = parent::parseItem($item);
|
||||
$item['content'] = $this->extractContent($item['uri']);
|
||||
return $item;
|
||||
}
|
||||
|
|
|
@ -13,11 +13,11 @@ class CourrierInternationalBridge extends FeedExpander
|
|||
$this->collectExpandableDatas(static::URI . 'feed/all/rss.xml', 20);
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$articlePage = getSimpleHTMLDOMCached($feedItem->link);
|
||||
$articlePage = getSimpleHTMLDOMCached($item['uri']);
|
||||
$content = $articlePage->find('.article-text, depeche-text', 0);
|
||||
if (!$content) {
|
||||
return $item;
|
||||
|
|
|
@ -56,9 +56,10 @@ class DarkReadingBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($feed_url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$article = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item['content'] = $this->extractArticleContent($article);
|
||||
$item['enclosures'] = []; //remove author profile picture
|
||||
|
|
|
@ -43,9 +43,9 @@ class DauphineLibereBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url, 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item = parent::parseItem($item);
|
||||
$item['content'] = $this->extractContent($item['uri']);
|
||||
return $item;
|
||||
}
|
||||
|
|
|
@ -163,19 +163,6 @@ class DeveloppezDotComBridge extends FeedExpander
|
|||
]
|
||||
];
|
||||
|
||||
/**
|
||||
* Return the RSS url for selected domain
|
||||
*/
|
||||
private function getRssUrl()
|
||||
{
|
||||
$domain = $this->getInput('domain');
|
||||
if (!empty($domain)) {
|
||||
return 'https://' . $domain . self::DOMAIN . self::RSS_URL;
|
||||
}
|
||||
|
||||
return self::URI . self::RSS_URL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Grabs the RSS item from Developpez.com
|
||||
*/
|
||||
|
@ -189,15 +176,14 @@ class DeveloppezDotComBridge extends FeedExpander
|
|||
* Parse the content of every RSS item. And will try to get the full article
|
||||
* pointed by the item URL intead of the default abstract.
|
||||
*/
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
if (count($this->items) >= $this->getInput('limit')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// This function parse each entry in the RSS with the default parse
|
||||
$item = parent::parseItem($newsItem);
|
||||
|
||||
// There is a bug in Developpez RSS, coma are writtent as '~?' in the
|
||||
// title, so I have to fix it manually
|
||||
$item['title'] = $this->fixComaInTitle($item['title']);
|
||||
|
@ -229,6 +215,19 @@ class DeveloppezDotComBridge extends FeedExpander
|
|||
return $item;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the RSS url for selected domain
|
||||
*/
|
||||
private function getRssUrl()
|
||||
{
|
||||
$domain = $this->getInput('domain');
|
||||
if (!empty($domain)) {
|
||||
return 'https://' . $domain . self::DOMAIN . self::RSS_URL;
|
||||
}
|
||||
|
||||
return self::URI . self::RSS_URL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace '~?' by a proper coma ','
|
||||
*/
|
||||
|
@ -334,6 +333,9 @@ class DeveloppezDotComBridge extends FeedExpander
|
|||
*/
|
||||
private function isHtmlTagNotTxt($txt)
|
||||
{
|
||||
if ($txt === '') {
|
||||
return false;
|
||||
}
|
||||
$html = str_get_html($txt);
|
||||
return $html && $html->root && count($html->root->children) > 0;
|
||||
}
|
||||
|
|
|
@ -93,21 +93,22 @@ class EconomistBridge extends FeedExpander
|
|||
$limit = 30;
|
||||
}
|
||||
|
||||
$this->collectExpandableDatas('https://www.economist.com/' . $category . '/rss.xml', $limit);
|
||||
$url = 'https://www.economist.com/' . $category . '/rss.xml';
|
||||
$this->collectExpandableDatas($url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
$html = getSimpleHTMLDOM($item['uri']);
|
||||
$item = parent::parseItem($item);
|
||||
$dom = getSimpleHTMLDOM($item['uri']);
|
||||
|
||||
$article = $html->find('#new-article-template', 0);
|
||||
$article = $dom->find('#new-article-template', 0);
|
||||
if ($article == null) {
|
||||
$article = $html->find('main', 0);
|
||||
$article = $dom->find('main', 0);
|
||||
}
|
||||
if ($article) {
|
||||
$elem = $article->find('div', 0);
|
||||
list($content, $audio_url) = $this->processContent($html, $elem);
|
||||
list($content, $audio_url) = $this->processContent($dom, $elem);
|
||||
$item['content'] = $content;
|
||||
if ($audio_url != null) {
|
||||
$item['enclosures'] = [$audio_url];
|
||||
|
|
|
@ -10,26 +10,28 @@ class EngadgetBridge extends FeedExpander
|
|||
|
||||
public function collectData()
|
||||
{
|
||||
$url = 'https://www.engadget.com/rss.xml';
|
||||
$max = 10;
|
||||
$this->collectExpandableDatas(static::URI . 'rss.xml', $max);
|
||||
$this->collectExpandableDatas($url, $max);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$url = (string) $newsItem->link;
|
||||
if (!$url) {
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$itemUrl = trim($item['uri']);
|
||||
if (!$itemUrl) {
|
||||
return $item;
|
||||
}
|
||||
// todo: remove querystring tracking
|
||||
$articlePage = getSimpleHTMLDOM($url);
|
||||
$dom = getSimpleHTMLDOM($itemUrl);
|
||||
// figure contain's the main article image
|
||||
$article = $articlePage->find('figure', 0);
|
||||
$article = $dom->find('figure', 0);
|
||||
// .article-text has the actual article
|
||||
foreach ($articlePage->find('.article-text') as $element) {
|
||||
foreach ($dom->find('.article-text') as $element) {
|
||||
$article = $article . $element;
|
||||
}
|
||||
$item['content'] = $article;
|
||||
$item['content'] = $article ?? '';
|
||||
return $item;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Appears to be protected by cloudflare now
|
||||
*/
|
||||
class EsquerdaNetBridge extends FeedExpander
|
||||
{
|
||||
const MAINTAINER = 'somini';
|
||||
|
@ -23,32 +26,16 @@ class EsquerdaNetBridge extends FeedExpander
|
|||
]
|
||||
];
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$type = $this->getInput('feed');
|
||||
return self::URI . '/rss/' . $type;
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://www.esquerda.net/sites/default/files/favicon_0.ico';
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
parent::collectExpandableDatas($this->getURI());
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
# Fix Publish date
|
||||
$badDate = $newsItem->pubDate;
|
||||
preg_match('|(?P<day>\d\d)/(?P<month>\d\d)/(?P<year>\d\d\d\d) - (?P<hour>\d\d):(?P<minute>\d\d)|', $badDate, $d);
|
||||
$newsItem->pubDate = sprintf('%s-%s-%sT%s:%s', $d['year'], $d['month'], $d['day'], $d['hour'], $d['minute']);
|
||||
$item = parent::parseItem($newsItem);
|
||||
# Include all the content
|
||||
$uri = $item['uri'];
|
||||
$html = getSimpleHTMLDOMCached($uri);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$html = getSimpleHTMLDOMCached($item['uri']);
|
||||
$content = $html->find('div#content div.content', 0);
|
||||
## Fix author
|
||||
$authorHTML = $html->find('.field-name-field-op-author a', 0);
|
||||
|
@ -72,4 +59,15 @@ class EsquerdaNetBridge extends FeedExpander
|
|||
$item['content'] = $content;
|
||||
return $item;
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$type = $this->getInput('feed');
|
||||
return self::URI . '/rss/' . $type;
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://www.esquerda.net/sites/default/files/favicon_0.ico';
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,9 +43,4 @@ class FeedExpanderExampleBridge extends FeedExpander
|
|||
returnClientError('Unknown version ' . $this->getInput('version') . '!');
|
||||
}
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
{
|
||||
return (array) $newsItem;
|
||||
}
|
||||
}
|
||||
|
|
23
bridges/FeedExpanderTestBridge.php
Normal file
23
bridges/FeedExpanderTestBridge.php
Normal file
|
@ -0,0 +1,23 @@
|
|||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
class FeedExpanderTestBridge extends FeedExpander
|
||||
{
|
||||
const MAINTAINER = 'No maintainer';
|
||||
const NAME = 'Unnamed bridge';
|
||||
const URI = 'https://esdf.com/';
|
||||
const DESCRIPTION = 'No description provided';
|
||||
const PARAMETERS = [];
|
||||
const CACHE_TIMEOUT = 3600;
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = 'http://static.userland.com/gems/backend/sampleRss.xml'; // rss 0.91
|
||||
//$url = 'http://feeds.nature.com/nature/rss/current?format=xml'; // rss 1.0
|
||||
//$url = 'https://dvikan.no/feed.xml'; // rss 2.0
|
||||
//$url = 'https://nedlasting.geonorge.no/geonorge/Tjenestefeed.xml'; // atom
|
||||
|
||||
$this->collectExpandableDatas($url);
|
||||
}
|
||||
}
|
|
@ -82,9 +82,9 @@ class FilterBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($this->getURI());
|
||||
}
|
||||
|
||||
protected function parseItem($newItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
// Generate title from first 50 characters of content?
|
||||
if ($this->getInput('title_from_content') && array_key_exists('content', $item)) {
|
||||
|
|
|
@ -12,12 +12,12 @@ class ForGifsBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('https://forgifs.com/gallery/srss/7');
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$content = str_get_html($item['content']);
|
||||
$img = $content->find('img', 0);
|
||||
$dom = str_get_html($item['content']);
|
||||
$img = $dom->find('img', 0);
|
||||
$poster = $img->src;
|
||||
|
||||
// The actual gif is the same path but its id must be decremented by one.
|
||||
|
@ -34,7 +34,7 @@ class ForGifsBridge extends FeedExpander
|
|||
$img->width = 'auto';
|
||||
$img->height = 'auto';
|
||||
|
||||
$item['content'] = $content;
|
||||
$item['content'] = (string) $dom;
|
||||
|
||||
return $item;
|
||||
}
|
||||
|
|
|
@ -14,15 +14,17 @@ class FreeCodeCampBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('https://www.freecodecamp.org/news/rss/', 15);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
// $articlePage gets the entire page's contents
|
||||
$articlePage = getSimpleHTMLDOM($newsItem->link);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$dom = getSimpleHTMLDOM($item['uri']);
|
||||
|
||||
// figure contain's the main article image
|
||||
$article = $articlePage->find('figure', 0);
|
||||
$article = $dom->find('figure', 0);
|
||||
|
||||
// the actual article
|
||||
foreach ($articlePage->find('.post-full-content') as $element) {
|
||||
foreach ($dom->find('.post-full-content') as $element) {
|
||||
$article = $article . $element;
|
||||
}
|
||||
$item['content'] = $article;
|
||||
|
|
|
@ -85,13 +85,14 @@ class FuturaSciencesBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url, 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$item['uri'] = str_replace('#xtor%3DRSS-8', '', $item['uri']);
|
||||
$article = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item['content'] = $this->extractArticleContent($article);
|
||||
$author = $this->extractAuthor($article);
|
||||
$dom = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item['content'] = $this->extractArticleContent($dom);
|
||||
$author = $this->extractAuthor($dom);
|
||||
if (!empty($author)) {
|
||||
$item['author'] = $author;
|
||||
}
|
||||
|
|
|
@ -9,15 +9,15 @@ class HardwareInfoBridge extends FeedExpander
|
|||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas('https://nl.hardware.info/updates/all.rss', 20);
|
||||
$this->collectExpandableDatas('https://nl.hardware.info/updates/all.rss', 10);
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
//get full article
|
||||
$articlePage = getSimpleHTMLDOMCached($feedItem->link);
|
||||
$itemUrl = $item['uri'];
|
||||
$articlePage = getSimpleHTMLDOMCached($itemUrl);
|
||||
|
||||
$article = $articlePage->find('div.article__content', 0);
|
||||
|
||||
|
|
|
@ -125,9 +125,10 @@ class HeiseBridge extends FeedExpander
|
|||
);
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$sessioncookie = $this->getInput('sessioncookie');
|
||||
|
||||
// strip rss parameter
|
||||
|
|
|
@ -10,17 +10,16 @@ class IGNBridge extends FeedExpander
|
|||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas('http://feeds.ign.com/ign/all', 15);
|
||||
$this->collectExpandableDatas('http://feeds.ign.com/ign/all', 2);
|
||||
}
|
||||
|
||||
// IGNs feed is both hidden and incomplete. This bridge tries to fix this.
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
// $articlePage gets the entire page's contents
|
||||
$articlePage = getSimpleHTMLDOM($newsItem->link);
|
||||
$articlePage = getSimpleHTMLDOM($item['uri']);
|
||||
|
||||
// List of BS elements
|
||||
$uselessElements = [
|
||||
|
@ -33,7 +32,7 @@ class IGNBridge extends FeedExpander
|
|||
'.jsx-4213937408',
|
||||
'.commerce-container',
|
||||
'.widget-container',
|
||||
'.newsletter-signup-button'
|
||||
'.newsletter-signup-button',
|
||||
];
|
||||
|
||||
// Remove useless elements
|
||||
|
|
|
@ -12,9 +12,10 @@ class LeMondeInformatiqueBridge extends FeedExpander
|
|||
$this->collectExpandableDatas(self::URI . 'rss/rss.xml', 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$article_html = getSimpleHTMLDOMCached($item['uri']);
|
||||
|
||||
//Deduce thumbnail URL from article image URL
|
||||
|
|
|
@ -13,12 +13,11 @@ class ListverseBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('https://listverse.com/feed/', 15);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
// $articlePage gets the entire page's contents
|
||||
$articlePage = getSimpleHTMLDOM($newsItem->link);
|
||||
$article = $articlePage->find('#articlecontentonly', 0);
|
||||
$item = parent::parseItem($item);
|
||||
$dom = getSimpleHTMLDOM($item['uri']);
|
||||
$article = $dom->find('#articlecontentonly', 0);
|
||||
$item['content'] = $article;
|
||||
return $item;
|
||||
}
|
||||
|
|
|
@ -29,9 +29,11 @@ class MediapartBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$itemUrl = $item['uri'];
|
||||
|
||||
// Mediapart provide multiple type of contents.
|
||||
// We only process items relative to the newspaper
|
||||
|
@ -49,12 +51,8 @@ class MediapartBridge extends FeedExpander
|
|||
$opt = [];
|
||||
$opt[CURLOPT_COOKIE] = 'MPSESSID=' . $mpsessid;
|
||||
|
||||
// Get the page
|
||||
$articlePage = getSimpleHTMLDOM(
|
||||
$newsItem->link . '?onglet=full',
|
||||
[],
|
||||
$opt
|
||||
);
|
||||
$pageUrl = $itemUrl . '?onglet=full';
|
||||
$articlePage = getSimpleHTMLDOM($pageUrl, [], $opt);
|
||||
|
||||
// Extract the article content
|
||||
$content = $articlePage->find('div.content-article', 0)->innertext;
|
||||
|
|
|
@ -22,17 +22,19 @@ class MsnMondeBridge extends FeedExpander
|
|||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas(self::FEED_URL, self::LIMIT);
|
||||
$this->collectExpandableDatas(self::FEED_URL, 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
if (!preg_match('#fr-fr/actualite.*/ar-(?<id>[\w]*)\?#', $item['uri'], $matches)) {
|
||||
return;
|
||||
return null;
|
||||
}
|
||||
|
||||
$json = json_decode(getContents(self::JSON_URL . $matches['id']), true);
|
||||
$jsonString = getContents(self::JSON_URL . $matches['id']);
|
||||
$json = json_decode($jsonString, true);
|
||||
$item['content'] = $json['body'];
|
||||
if (!empty($json['authors'])) {
|
||||
$item['author'] = reset($json['authors'])['name'];
|
||||
|
|
|
@ -10,17 +10,18 @@ class NYTBridge extends FeedExpander
|
|||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas('https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml', 40);
|
||||
$url = 'https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml';
|
||||
$this->collectExpandableDatas($url, 40);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$article = '';
|
||||
|
||||
// $articlePage gets the entire page's contents
|
||||
try {
|
||||
$articlePage = getSimpleHTMLDOM($newsItem->link);
|
||||
$articlePage = getSimpleHTMLDOM($item['uri']);
|
||||
} catch (HttpException $e) {
|
||||
// 403 Forbidden, This means we got anti-bot response
|
||||
if ($e->getCode() === 403) {
|
||||
|
|
|
@ -88,9 +88,10 @@ class NextInpactBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$item['content'] = $this->extractContent($item, $item['uri']);
|
||||
if (is_null($item['content'])) {
|
||||
return null; //Filtered article
|
||||
|
|
|
@ -26,7 +26,8 @@ class NextgovBridge extends FeedExpander
|
|||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas(self::URI . 'rss/' . $this->getInput('category') . '/', 10);
|
||||
$url = self::URI . 'rss/' . $this->getInput('category') . '/';
|
||||
$this->collectExpandableDatas($url, 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
|
|
|
@ -12,9 +12,10 @@ class NiceMatinBridge extends FeedExpander
|
|||
$this->collectExpandableDatas(self::URI . 'derniere-minute/rss', 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$item['content'] = $this->extractContent($item['uri']);
|
||||
return $item;
|
||||
}
|
||||
|
|
|
@ -65,12 +65,14 @@ class NyaaTorrentsBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($this->getURI(), 20);
|
||||
}
|
||||
|
||||
protected function parseItem($newItem)
|
||||
protected function parseItem($newsItem)
|
||||
{
|
||||
$item = parent::parseItem($newItem);
|
||||
$item = parent::parseItem($newsItem);
|
||||
|
||||
$nyaaFields = (array)($newsItem->children('nyaa', true));
|
||||
|
||||
$item['id'] = str_replace(['https://nyaa.si/download/', '.torrent'], '', $item['uri']);
|
||||
|
||||
$nyaaFields = (array)($newItem->children('nyaa', true));
|
||||
$item = array_merge($item, $nyaaFields);
|
||||
|
||||
// Convert URI from torrent file to web page
|
||||
|
|
|
@ -117,18 +117,18 @@ class OnVaSortirBridge extends FeedExpander
|
|||
]
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = 'https://' . $this->getInput('city') . '.onvasortir.com/rss.php';
|
||||
$this->collectExpandableDatas($url);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
$html = getSimpleHTMLDOMCached($item['uri']);
|
||||
$text = $html->find('div.corpsMax', 0)->innertext;
|
||||
$dom = getSimpleHTMLDOMCached($item['uri']);
|
||||
$text = $dom->find('div.corpsMax', 0)->innertext;
|
||||
$item['content'] = utf8_encode($text);
|
||||
return $item;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas('https://' .
|
||||
$this->getInput('city') . '.onvasortir.com/rss.php');
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,22 +29,25 @@ but some RSS readers don\'t support this. "img" tag are supported by most browse
|
|||
$this->collectExpandableDatas('https://www.phoronix.com/rss.php', $this->getInput('n'));
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
// $articlePage gets the entire page's contents
|
||||
$articlePage = getSimpleHTMLDOM($newsItem->link);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$itemUrl = $item['uri'];
|
||||
|
||||
$articlePage = getSimpleHTMLDOM($itemUrl);
|
||||
$articlePage = defaultLinkTo($articlePage, $this->getURI());
|
||||
// Extract final link. From Facebook's like plugin.
|
||||
parse_str(parse_url($articlePage->find('iframe[src^=//www.facebook.com/plugins]', 0), PHP_URL_QUERY), $facebookQuery);
|
||||
$parsedUrlQuery = parse_url($articlePage->find('iframe[src^=//www.facebook.com/plugins]', 0), PHP_URL_QUERY);
|
||||
parse_str($parsedUrlQuery, $facebookQuery);
|
||||
if (array_key_exists('href', $facebookQuery)) {
|
||||
$newsItem->link = $facebookQuery['href'];
|
||||
$itemUrl = $facebookQuery['href'];
|
||||
}
|
||||
$item['content'] = $this->extractContent($articlePage);
|
||||
|
||||
$pages = $articlePage->find('.pagination a[!title]');
|
||||
foreach ($pages as $page) {
|
||||
$pageURI = urljoin($newsItem->link, html_entity_decode($page->href));
|
||||
$pageURI = urljoin($itemUrl, html_entity_decode($page->href));
|
||||
$page = getSimpleHTMLDOM($pageURI);
|
||||
$item['content'] .= $this->extractContent($page);
|
||||
}
|
||||
|
|
|
@ -6,9 +6,15 @@ class QwantzBridge extends FeedExpander
|
|||
const URI = 'https://qwantz.com/';
|
||||
const DESCRIPTION = 'Latest comic.';
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
public function collectData()
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
$this->collectExpandableDatas(self::URI . 'rssfeed.php');
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$item['author'] = 'Ryan North';
|
||||
|
||||
preg_match('/title="(.*?)"/', $item['content'], $matches);
|
||||
|
@ -25,11 +31,6 @@ class QwantzBridge extends FeedExpander
|
|||
return $item;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas(self::URI . 'rssfeed.php');
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return self::URI . 'favicon.ico';
|
||||
|
|
|
@ -12,12 +12,11 @@ class RaceDepartmentBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('https://www.racedepartment.com/ams/index.rss', 10);
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
//fetch page
|
||||
$articlePage = getSimpleHTMLDOMCached($feedItem->link);
|
||||
$articlePage = getSimpleHTMLDOMCached($item['uri']);
|
||||
|
||||
$coverImage = $articlePage->find('img.js-articleCoverImage', 0);
|
||||
#relative url -> absolute url
|
||||
|
|
|
@ -42,9 +42,9 @@ class ScribbleHubBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url);
|
||||
}
|
||||
|
||||
protected function parseItem($newItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
//For series, filter out other series from 'All' feed
|
||||
if (
|
||||
|
@ -57,7 +57,7 @@ class ScribbleHubBridge extends FeedExpander
|
|||
$item['comments'] = $item['uri'] . '#comments';
|
||||
|
||||
try {
|
||||
$item_html = getSimpleHTMLDOMCached($item['uri']);
|
||||
$dom = getSimpleHTMLDOMCached($item['uri']);
|
||||
} catch (HttpException $e) {
|
||||
// 403 Forbidden, This means we got anti-bot response
|
||||
if ($e->getCode() === 403) {
|
||||
|
@ -66,22 +66,22 @@ class ScribbleHubBridge extends FeedExpander
|
|||
throw $e;
|
||||
}
|
||||
|
||||
$item_html = defaultLinkTo($item_html, self::URI);
|
||||
$dom = defaultLinkTo($dom, self::URI);
|
||||
|
||||
//Retrieve full description from page contents
|
||||
$item['content'] = $item_html->find('#chp_raw', 0);
|
||||
$item['content'] = $dom->find('#chp_raw', 0);
|
||||
|
||||
//Retrieve image for thumbnail
|
||||
$item_image = $item_html->find('.s_novel_img > img', 0)->src;
|
||||
$item_image = $dom->find('.s_novel_img > img', 0)->src;
|
||||
$item['enclosures'] = [$item_image];
|
||||
|
||||
//Restore lost categories
|
||||
$item_story = html_entity_decode($item_html->find('.chp_byauthor > a', 0)->innertext);
|
||||
$item_sid = $item_html->find('#mysid', 0)->value;
|
||||
$item_story = html_entity_decode($dom->find('.chp_byauthor > a', 0)->innertext);
|
||||
$item_sid = $dom->find('#mysid', 0)->value;
|
||||
$item['categories'] = [$item_story, $item_sid];
|
||||
|
||||
//Generate UID
|
||||
$item_pid = $item_html->find('#mypostid', 0)->value;
|
||||
$item_pid = $dom->find('#mypostid', 0)->value;
|
||||
$item['uid'] = $item_sid . "/$item_pid";
|
||||
|
||||
return $item;
|
||||
|
|
|
@ -21,6 +21,12 @@ class SplCenterBridge extends FeedExpander
|
|||
|
||||
const CACHE_TIMEOUT = 3600; // 1 hour
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = $this->getURI() . '/rss.xml';
|
||||
$this->collectExpandableDatas($url);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
@ -37,11 +43,6 @@ class SplCenterBridge extends FeedExpander
|
|||
return $item;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas($this->getURI() . '/rss.xml');
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
if (!is_null($this->getInput('content'))) {
|
||||
|
|
|
@ -30,14 +30,17 @@ class TapasBridge extends FeedExpander
|
|||
|
||||
protected $id;
|
||||
|
||||
public function getURI()
|
||||
public function collectData()
|
||||
{
|
||||
if ($this->id) {
|
||||
return self::URI . 'rss/series/' . $this->id;
|
||||
} else {
|
||||
return self::URI . 'series/' . $this->getInput('title') . '/info/';
|
||||
if (preg_match('/^[\d]+$/', $this->getInput('title'))) {
|
||||
$this->id = $this->getInput('title');
|
||||
}
|
||||
return self::URI;
|
||||
if ($this->getInput('force_title') or !$this->id) {
|
||||
$html = getSimpleHTMLDOM($this->getURI()) or returnServerError('Could not request ' . $this->getURI());
|
||||
$this->id = $html->find('meta[property$=":url"]', 0)->content;
|
||||
$this->id = str_ireplace(['tapastic://series/', '/info'], '', $this->id);
|
||||
}
|
||||
$this->collectExpandableDatas($this->getURI());
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
|
@ -72,16 +75,13 @@ class TapasBridge extends FeedExpander
|
|||
return $item;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
public function getURI()
|
||||
{
|
||||
if (preg_match('/^[\d]+$/', $this->getInput('title'))) {
|
||||
$this->id = $this->getInput('title');
|
||||
if ($this->id) {
|
||||
return self::URI . 'rss/series/' . $this->id;
|
||||
} else {
|
||||
return self::URI . 'series/' . $this->getInput('title') . '/info/';
|
||||
}
|
||||
if ($this->getInput('force_title') or !$this->id) {
|
||||
$html = getSimpleHTMLDOM($this->getURI()) or returnServerError('Could not request ' . $this->getURI());
|
||||
$this->id = $html->find('meta[property$=":url"]', 0)->content;
|
||||
$this->id = str_ireplace(['tapastic://series/', '/info'], '', $this->id);
|
||||
}
|
||||
$this->collectExpandableDatas($this->getURI());
|
||||
return self::URI;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -52,18 +52,15 @@ class TheGuardianBridge extends FeedExpander
|
|||
public function collectData()
|
||||
{
|
||||
$feed = $this->getInput('feed');
|
||||
$feedURL = 'https://feeds.theguardian.com/theguardian/' . $feed;
|
||||
$this->collectExpandableDatas($feedURL, 10);
|
||||
$url = 'https://feeds.theguardian.com/theguardian/' . $feed;
|
||||
$this->collectExpandableDatas($url, 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
// --- Recovering the article ---
|
||||
|
||||
// $articlePage gets the entire page's contents
|
||||
$articlePage = getSimpleHTMLDOM($newsItem->link);
|
||||
$articlePage = getSimpleHTMLDOM($item['uri']);
|
||||
// figure contain's the main article image
|
||||
$article = $articlePage->find('figure', 0);
|
||||
// content__article-body has the actual article
|
||||
|
|
|
@ -8,18 +8,24 @@ class TwitterEngineeringBridge extends FeedExpander
|
|||
const DESCRIPTION = 'Returns the newest articles.';
|
||||
const CACHE_TIMEOUT = 21600; // 6h
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = 'https://blog.twitter.com/engineering/en_us/blog.rss';
|
||||
$this->collectExpandableDatas($url);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$article_html = getSimpleHTMLDOMCached($item['uri']);
|
||||
if (!$article_html) {
|
||||
$dom = getSimpleHTMLDOMCached($item['uri']);
|
||||
if (!$dom) {
|
||||
$item['content'] .= '<p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';
|
||||
return $item;
|
||||
}
|
||||
$article_html = defaultLinkTo($article_html, $this->getURI());
|
||||
$dom = defaultLinkTo($dom, $this->getURI());
|
||||
|
||||
$article_body = $article_html->find('div.column.column-6', 0);
|
||||
$article_body = $dom->find('div.column.column-6', 0);
|
||||
|
||||
// Remove elements that are not part of article content
|
||||
$unwanted_selector = 'div.bl02-blog-post-text-masthead, div.tweet-error-text, div.bl13-tweet-template';
|
||||
|
@ -33,8 +39,8 @@ class TwitterEngineeringBridge extends FeedExpander
|
|||
}
|
||||
|
||||
$item['content'] = $article_body;
|
||||
$item['timestamp'] = strtotime($article_html->find('span.b02-blog-post-no-masthead__date', 0)->innertext);
|
||||
$item['categories'] = self::getCategoriesFromTags($article_html);
|
||||
$item['timestamp'] = strtotime($dom->find('span.b02-blog-post-no-masthead__date', 0)->innertext);
|
||||
$item['categories'] = self::getCategoriesFromTags($dom);
|
||||
|
||||
return $item;
|
||||
}
|
||||
|
@ -53,12 +59,6 @@ class TwitterEngineeringBridge extends FeedExpander
|
|||
return $categories;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$feed = static::URI . 'en_us/blog.rss';
|
||||
$this->collectExpandableDatas($feed);
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
// Else the original feed returns "English (US)" as the title
|
||||
|
|
|
@ -13,11 +13,11 @@ class VarietyBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('https://feeds.feedburner.com/variety/headlines', 15);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item = parent::parseItem($item);
|
||||
// $articlePage gets the entire page's contents
|
||||
$articlePage = getSimpleHTMLDOM($newsItem->link);
|
||||
$articlePage = getSimpleHTMLDOM($item['uri']);
|
||||
|
||||
// Remove Script tags
|
||||
foreach ($articlePage->find('script') as $script_tag) {
|
||||
|
|
|
@ -32,14 +32,14 @@ class ViceBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($feedURL, 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
// $articlePage gets the entire page's contents
|
||||
$articlePage = getSimpleHTMLDOM($newsItem->link);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$articlePage = getSimpleHTMLDOM($item['uri']);
|
||||
// text and embedded content
|
||||
$article = $articlePage->find('.article__body', 0);
|
||||
$item['content'] = $article;
|
||||
$item['content'] = $article ?? '';
|
||||
|
||||
return $item;
|
||||
}
|
||||
|
|
|
@ -50,13 +50,16 @@ class WiredBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($feed_url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$originalContent = $item['content'];
|
||||
|
||||
$article = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item['content'] = $this->extractArticleContent($article);
|
||||
|
||||
$headline = strval($newsItem->description);
|
||||
$headline = $originalContent;
|
||||
if (!empty($headline)) {
|
||||
$item['content'] = '<p><b>' . $headline . '</b></p>' . $item['content'];
|
||||
}
|
||||
|
|
|
@ -20,50 +20,56 @@ class WordPressBridge extends FeedExpander
|
|||
],
|
||||
]];
|
||||
|
||||
private function cleanContent($content)
|
||||
public function collectData()
|
||||
{
|
||||
$content = stripWithDelimiters($content, '<script', '</script>');
|
||||
$content = preg_replace('/<div class="wpa".*/', '', $content);
|
||||
$content = preg_replace('/<form.*\/form>/', '', $content);
|
||||
return $content;
|
||||
$limit = $this->getInput('limit') ?? 10;
|
||||
if ($this->getInput('url') && substr($this->getInput('url'), 0, strlen('http')) !== 'http') {
|
||||
// just in case someone find a way to access local files by playing with the url
|
||||
returnClientError('The url parameter must either refer to http or https protocol.');
|
||||
}
|
||||
try {
|
||||
$this->collectExpandableDatas($this->getURI() . '/feed/atom/', $limit);
|
||||
} catch (Exception $e) {
|
||||
$this->collectExpandableDatas($this->getURI() . '/?feed=atom', $limit);
|
||||
}
|
||||
}
|
||||
|
||||
protected function parseItem($newItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newItem);
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$article_html = getSimpleHTMLDOMCached($item['uri']);
|
||||
$dom = getSimpleHTMLDOMCached($item['uri']);
|
||||
|
||||
// Find article body
|
||||
$article = null;
|
||||
switch (true) {
|
||||
case !empty($this->getInput('content-selector')):
|
||||
// custom contect selector (manually specified by user)
|
||||
$article = $article_html->find($this->getInput('content-selector'), 0);
|
||||
$article = $dom->find($this->getInput('content-selector'), 0);
|
||||
break;
|
||||
case !is_null($article_html->find('[itemprop=articleBody]', 0)):
|
||||
case !is_null($dom->find('[itemprop=articleBody]', 0)):
|
||||
// highest priority content div (used for SEO)
|
||||
$article = $article_html->find('[itemprop=articleBody]', 0);
|
||||
$article = $dom->find('[itemprop=articleBody]', 0);
|
||||
break;
|
||||
case !is_null($article_html->find('.article-content', 0)):
|
||||
case !is_null($dom->find('.article-content', 0)):
|
||||
// more precise than article when present
|
||||
$article = $article_html->find('.article-content', 0);
|
||||
$article = $dom->find('.article-content', 0);
|
||||
break;
|
||||
case !is_null($article_html->find('article', 0)):
|
||||
case !is_null($dom->find('article', 0)):
|
||||
// most common content div
|
||||
$article = $article_html->find('article', 0);
|
||||
$article = $dom->find('article', 0);
|
||||
break;
|
||||
case !is_null($article_html->find('.single-content', 0)):
|
||||
case !is_null($dom->find('.single-content', 0)):
|
||||
// another common content div
|
||||
$article = $article_html->find('.single-content', 0);
|
||||
$article = $dom->find('.single-content', 0);
|
||||
break;
|
||||
case !is_null($article_html->find('.post-content', 0)):
|
||||
case !is_null($dom->find('.post-content', 0)):
|
||||
// another common content div
|
||||
$article = $article_html->find('.post-content', 0);
|
||||
$article = $dom->find('.post-content', 0);
|
||||
break;
|
||||
case !is_null($article_html->find('.post', 0)):
|
||||
case !is_null($dom->find('.post', 0)):
|
||||
// for old WordPress themes without HTML5
|
||||
$article = $article_html->find('.post', 0);
|
||||
$article = $dom->find('.post', 0);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -76,7 +82,7 @@ class WordPressBridge extends FeedExpander
|
|||
|
||||
// Find article main image
|
||||
$article = convertLazyLoading($article);
|
||||
$article_image = $article_html->find('img.wp-post-image', 0);
|
||||
$article_image = $dom->find('img.wp-post-image', 0);
|
||||
if (!empty($item['content']) && (!is_object($article_image) || empty($article_image->src))) {
|
||||
$article_image = str_get_html($item['content'])->find('img.wp-post-image', 0);
|
||||
}
|
||||
|
@ -106,6 +112,14 @@ class WordPressBridge extends FeedExpander
|
|||
return $item;
|
||||
}
|
||||
|
||||
private function cleanContent($content)
|
||||
{
|
||||
$content = stripWithDelimiters($content, '<script', '</script>');
|
||||
$content = preg_replace('/<div class="wpa".*/', '', $content);
|
||||
$content = preg_replace('/<form.*\/form>/', '', $content);
|
||||
return $content;
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$url = $this->getInput('url');
|
||||
|
@ -114,18 +128,4 @@ class WordPressBridge extends FeedExpander
|
|||
}
|
||||
return $url;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$limit = $this->getInput('limit') ?? 10;
|
||||
if ($this->getInput('url') && substr($this->getInput('url'), 0, strlen('http')) !== 'http') {
|
||||
// just in case someone find a way to access local files by playing with the url
|
||||
returnClientError('The url parameter must either refer to http or https protocol.');
|
||||
}
|
||||
try {
|
||||
$this->collectExpandableDatas($this->getURI() . '/feed/atom/', $limit);
|
||||
} catch (Exception $e) {
|
||||
$this->collectExpandableDatas($this->getURI() . '/?feed=atom', $limit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,9 +30,9 @@ class WorldOfTanksBridge extends FeedExpander
|
|||
$this->collectExpandableDatas(sprintf('https://worldoftanks.eu/%s/rss/news/', $this->getInput('lang')));
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item = parent::parseItem($item);
|
||||
$item['content'] = $this->loadFullArticle($item['uri']);
|
||||
return $item;
|
||||
}
|
||||
|
|
|
@ -50,19 +50,19 @@ class ZeitBridge extends FeedExpander
|
|||
'defaultValue' => 5
|
||||
]
|
||||
]];
|
||||
const LIMIT = 5;
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas(
|
||||
$this->getInput('category'),
|
||||
$this->getInput('limit') ?: static::LIMIT
|
||||
);
|
||||
$url = $this->getInput('category');
|
||||
$limit = $this->getInput('limit') ?: 5;
|
||||
|
||||
$this->collectExpandableDatas($url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$item['enclosures'] = [];
|
||||
|
||||
$headers = [
|
||||
|
|
Loading…
Reference in a new issue