mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-02-02 00:11:00 +03:00
refactor: remove parent calls to parseItem (#3747)
This commit is contained in:
parent
e379019db2
commit
2880524dfc
55 changed files with 96 additions and 293 deletions
|
@ -25,10 +25,8 @@ class AcrimedBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$articlePage = getSimpleHTMLDOM($item['uri']);
|
||||
$article = sanitize($articlePage->find('article.article1', 0)->innertext);
|
||||
$article = defaultLinkTo($article, static::URI);
|
||||
|
|
|
@ -33,10 +33,8 @@ class ArsTechnicaBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$item_html = getSimpleHTMLDOMCached($item['uri'] . '&');
|
||||
$item_html = defaultLinkTo($item_html, self::URI);
|
||||
$item['content'] = $item_html->find('.amp-wp-article-content', 0);
|
||||
|
|
|
@ -13,10 +13,8 @@ class BleepingComputerBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($feed);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$article_html = getSimpleHTMLDOMCached($item['uri']);
|
||||
if (!$article_html) {
|
||||
$item['content'] .= '<p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';
|
||||
|
|
|
@ -43,10 +43,8 @@ class CNETFranceBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('https://www.cnetfrance.fr/feeds/rss/news/');
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
foreach ($this->bannedTitle as $term) {
|
||||
if (preg_match('/' . $term . '/mi', $item['title']) === 1) {
|
||||
return null;
|
||||
|
|
|
@ -34,10 +34,8 @@ class CaschyBridge extends FeedExpander
|
|||
);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
if (strpos($item['uri'], 'https://stadt-bremerhaven.de/') !== 0) {
|
||||
return $item;
|
||||
}
|
||||
|
|
|
@ -12,9 +12,8 @@ class CommonDreamsBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('http://www.commondreams.org/rss.xml', 10);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
$item['content'] = $this->extractContent($item['uri']);
|
||||
return $item;
|
||||
}
|
||||
|
|
|
@ -13,10 +13,8 @@ class CourrierInternationalBridge extends FeedExpander
|
|||
$this->collectExpandableDatas(static::URI . 'feed/all/rss.xml', 20);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$articlePage = getSimpleHTMLDOMCached($item['uri']);
|
||||
$content = $articlePage->find('.article-text, depeche-text', 0);
|
||||
if (!$content) {
|
||||
|
|
|
@ -56,10 +56,8 @@ class DarkReadingBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($feed_url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$article = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item['content'] = $this->extractArticleContent($article);
|
||||
$item['enclosures'] = []; //remove author profile picture
|
||||
|
|
|
@ -43,9 +43,8 @@ class DauphineLibereBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url, 10);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
$item['content'] = $this->extractContent($item['uri']);
|
||||
return $item;
|
||||
}
|
||||
|
|
|
@ -71,10 +71,8 @@ class DeutscheWelleBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($this->getInput('feed'));
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$parsedUrl = parse_url($item['uri']);
|
||||
unset($parsedUrl['query']);
|
||||
$url = $this->unparseUrl($parsedUrl);
|
||||
|
|
|
@ -176,10 +176,8 @@ class DeveloppezDotComBridge extends FeedExpander
|
|||
* Parse the content of every RSS item. And will try to get the full article
|
||||
* pointed by the item URL intead of the default abstract.
|
||||
*/
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
if (count($this->items) >= $this->getInput('limit')) {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -97,9 +97,8 @@ class EconomistBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
$dom = getSimpleHTMLDOM($item['uri']);
|
||||
|
||||
$article = $dom->find('#new-article-template', 0);
|
||||
|
|
|
@ -15,10 +15,8 @@ class EngadgetBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url, $max);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$itemUrl = trim($item['uri']);
|
||||
if (!$itemUrl) {
|
||||
return $item;
|
||||
|
|
|
@ -31,10 +31,8 @@ class EsquerdaNetBridge extends FeedExpander
|
|||
parent::collectExpandableDatas($this->getURI());
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$html = getSimpleHTMLDOMCached($item['uri']);
|
||||
$content = $html->find('div#content div.content', 0);
|
||||
## Fix author
|
||||
|
|
|
@ -14,9 +14,9 @@ class FeedExpanderTestBridge extends FeedExpander
|
|||
public function collectData()
|
||||
{
|
||||
$url = 'http://static.userland.com/gems/backend/sampleRss.xml'; // rss 0.91
|
||||
//$url = 'http://feeds.nature.com/nature/rss/current?format=xml'; // rss 1.0
|
||||
//$url = 'https://dvikan.no/feed.xml'; // rss 2.0
|
||||
//$url = 'https://nedlasting.geonorge.no/geonorge/Tjenestefeed.xml'; // atom
|
||||
$url = 'http://feeds.nature.com/nature/rss/current?format=xml'; // rss 1.0
|
||||
$url = 'https://dvikan.no/feed.xml'; // rss 2.0
|
||||
$url = 'https://nedlasting.geonorge.no/geonorge/Tjenestefeed.xml'; // atom
|
||||
|
||||
$this->collectExpandableDatas($url);
|
||||
}
|
||||
|
|
|
@ -82,10 +82,8 @@ class FilterBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($this->getURI());
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
// Generate title from first 50 characters of content?
|
||||
if ($this->getInput('title_from_content') && array_key_exists('content', $item)) {
|
||||
$content = str_get_html($item['content']);
|
||||
|
|
|
@ -29,10 +29,8 @@ class FolhaDeSaoPauloBridge extends FeedExpander
|
|||
]
|
||||
];
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
if ($this->getInput('deep_crawl')) {
|
||||
$articleHTMLContent = getSimpleHTMLDOMCached($item['uri']);
|
||||
if ($articleHTMLContent) {
|
||||
|
|
|
@ -12,10 +12,8 @@ class ForGifsBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('https://forgifs.com/gallery/srss/7');
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$dom = str_get_html($item['content']);
|
||||
$img = $dom->find('img', 0);
|
||||
$poster = $img->src;
|
||||
|
|
|
@ -14,10 +14,8 @@ class FreeCodeCampBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('https://www.freecodecamp.org/news/rss/', 15);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$dom = getSimpleHTMLDOM($item['uri']);
|
||||
|
||||
// figure contain's the main article image
|
||||
|
|
|
@ -85,10 +85,8 @@ class FuturaSciencesBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url, 10);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$item['uri'] = str_replace('#xtor%3DRSS-8', '', $item['uri']);
|
||||
$dom = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item['content'] = $this->extractArticleContent($dom);
|
||||
|
|
|
@ -8,10 +8,8 @@ class GizmodoBridge extends FeedExpander
|
|||
const CACHE_TIMEOUT = 1800; // 30min
|
||||
const DESCRIPTION = 'Returns the newest posts from Gizmodo.';
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$html = getSimpleHTMLDOMCached($item['uri']);
|
||||
|
||||
$html = defaultLinkTo($html, $this->getURI());
|
||||
|
|
|
@ -63,9 +63,8 @@ class GolemBridge extends FeedExpander
|
|||
);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
$item['content'] ??= '';
|
||||
$uri = $item['uri'];
|
||||
|
||||
|
|
|
@ -12,10 +12,8 @@ class HardwareInfoBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('https://nl.hardware.info/updates/all.rss', 10);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$itemUrl = $item['uri'];
|
||||
$articlePage = getSimpleHTMLDOMCached($itemUrl);
|
||||
|
||||
|
|
|
@ -125,10 +125,8 @@ class HeiseBridge extends FeedExpander
|
|||
);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$sessioncookie = $this->getInput('sessioncookie');
|
||||
|
||||
// strip rss parameter
|
||||
|
|
|
@ -15,10 +15,8 @@ class IGNBridge extends FeedExpander
|
|||
|
||||
// IGNs feed is both hidden and incomplete. This bridge tries to fix this.
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$articlePage = getSimpleHTMLDOM($item['uri']);
|
||||
|
||||
// List of BS elements
|
||||
|
|
|
@ -7,10 +7,8 @@ class KoreusBridge extends FeedExpander
|
|||
const URI = 'https://www.koreus.com/';
|
||||
const DESCRIPTION = 'Returns the newest posts from Koreus (full text)';
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$html = getSimpleHTMLDOMCached($item['uri']);
|
||||
$text = $html->find('p.itemText', 0)->innertext;
|
||||
$item['content'] = utf8_encode($text);
|
||||
|
|
|
@ -12,10 +12,8 @@ class LeMondeInformatiqueBridge extends FeedExpander
|
|||
$this->collectExpandableDatas(self::URI . 'rss/rss.xml', 10);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$article_html = getSimpleHTMLDOMCached($item['uri']);
|
||||
|
||||
//Deduce thumbnail URL from article image URL
|
||||
|
|
|
@ -13,9 +13,8 @@ class ListverseBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('https://listverse.com/feed/', 15);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
$dom = getSimpleHTMLDOM($item['uri']);
|
||||
$article = $dom->find('#articlecontentonly', 0);
|
||||
$item['content'] = $article;
|
||||
|
|
|
@ -29,10 +29,8 @@ class MediapartBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$itemUrl = $item['uri'];
|
||||
|
||||
// Mediapart provide multiple type of contents.
|
||||
|
|
|
@ -25,10 +25,8 @@ class MsnMondeBridge extends FeedExpander
|
|||
$this->collectExpandableDatas(self::FEED_URL, 10);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
if (!preg_match('#fr-fr/actualite.*/ar-(?<id>[\w]*)\?#', $item['uri'], $matches)) {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -14,10 +14,8 @@ class NYTBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url, 40);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$article = '';
|
||||
|
||||
try {
|
||||
|
|
|
@ -88,10 +88,8 @@ class NextInpactBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$item['content'] = $this->extractContent($item, $item['uri']);
|
||||
if (is_null($item['content'])) {
|
||||
return null; //Filtered article
|
||||
|
|
|
@ -31,10 +31,8 @@ class NextgovBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$article_thumbnail = 'https://cdn.nextgov.com/nextgov/images/logo.png';
|
||||
$item['content'] = '<p><b>' . $item['content'] . '</b></p>';
|
||||
|
||||
|
|
|
@ -12,10 +12,8 @@ class NiceMatinBridge extends FeedExpander
|
|||
$this->collectExpandableDatas(self::URI . 'derniere-minute/rss', 10);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$item['content'] = $this->extractContent($item['uri']);
|
||||
return $item;
|
||||
}
|
||||
|
|
|
@ -123,9 +123,8 @@ class OnVaSortirBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
$dom = getSimpleHTMLDOMCached($item['uri']);
|
||||
$text = $dom->find('div.corpsMax', 0)->innertext;
|
||||
$item['content'] = utf8_encode($text);
|
||||
|
|
|
@ -29,10 +29,8 @@ but some RSS readers don\'t support this. "img" tag are supported by most browse
|
|||
$this->collectExpandableDatas('https://www.phoronix.com/rss.php', $this->getInput('n'));
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$itemUrl = $item['uri'];
|
||||
|
||||
$articlePage = getSimpleHTMLDOM($itemUrl);
|
||||
|
|
|
@ -11,14 +11,12 @@ class QwantzBridge extends FeedExpander
|
|||
$this->collectExpandableDatas(self::URI . 'rssfeed.php');
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$item['author'] = 'Ryan North';
|
||||
|
||||
preg_match('/title="(.*?)"/', $item['content'], $matches);
|
||||
$title = $matches[1];
|
||||
$title = $matches[1] ?? '';
|
||||
|
||||
$content = str_get_html(html_entity_decode($item['content']));
|
||||
$comicURL = $content->find('img')[0]->{'src'};
|
||||
|
|
|
@ -12,10 +12,8 @@ class RaceDepartmentBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('https://www.racedepartment.com/ams/index.rss', 10);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$articlePage = getSimpleHTMLDOMCached($item['uri']);
|
||||
|
||||
$coverImage = $articlePage->find('img.js-articleCoverImage', 0);
|
||||
|
|
|
@ -42,10 +42,8 @@ class ScribbleHubBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
//For series, filter out other series from 'All' feed
|
||||
if (
|
||||
$this->queriedContext === 'Series'
|
||||
|
|
|
@ -27,10 +27,8 @@ class SplCenterBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$articleHtml = getSimpleHTMLDOMCached($item['uri']);
|
||||
|
||||
foreach ($articleHtml->find('.file') as $index => $media) {
|
||||
|
|
|
@ -43,10 +43,8 @@ class TapasBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($this->getURI());
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
// $namespaces = $feedItem->getNamespaces(true);
|
||||
// if (isset($namespaces['content'])) {
|
||||
// $description = $feedItem->children($namespaces['content']);
|
||||
|
|
|
@ -56,10 +56,8 @@ class TheGuardianBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url, 10);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$articlePage = getSimpleHTMLDOM($item['uri']);
|
||||
// figure contain's the main article image
|
||||
$article = $articlePage->find('figure', 0);
|
||||
|
|
|
@ -14,10 +14,8 @@ class TwitterEngineeringBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$dom = getSimpleHTMLDOMCached($item['uri']);
|
||||
if (!$dom) {
|
||||
$item['content'] .= '<p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';
|
||||
|
|
|
@ -13,10 +13,8 @@ class VarietyBridge extends FeedExpander
|
|||
$this->collectExpandableDatas('https://feeds.feedburner.com/variety/headlines', 15);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
// $articlePage gets the entire page's contents
|
||||
$articlePage = getSimpleHTMLDOM($item['uri']);
|
||||
|
||||
// Remove Script tags
|
||||
|
|
|
@ -32,10 +32,8 @@ class ViceBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($feedURL, 10);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$articlePage = getSimpleHTMLDOM($item['uri']);
|
||||
// text and embedded content
|
||||
$article = $articlePage->find('.article__body', 0);
|
||||
|
|
|
@ -12,10 +12,8 @@ class WeLiveSecurityBridge extends FeedExpander
|
|||
],
|
||||
];
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$html = getSimpleHTMLDOMCached($item['uri']);
|
||||
if (!$html) {
|
||||
$item['content'] .= '<br /><p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';
|
||||
|
|
|
@ -50,10 +50,8 @@ class WiredBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($feed_url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$originalContent = $item['content'];
|
||||
|
||||
$article = getSimpleHTMLDOMCached($item['uri']);
|
||||
|
|
|
@ -34,10 +34,8 @@ class WordPressBridge extends FeedExpander
|
|||
}
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$dom = getSimpleHTMLDOMCached($item['uri']);
|
||||
|
||||
// Find article body
|
||||
|
|
|
@ -30,9 +30,8 @@ class WorldOfTanksBridge extends FeedExpander
|
|||
$this->collectExpandableDatas(sprintf('https://worldoftanks.eu/%s/rss/news/', $this->getInput('lang')));
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
$item['content'] = $this->loadFullArticle($item['uri']);
|
||||
return $item;
|
||||
}
|
||||
|
|
|
@ -174,10 +174,8 @@ class ZDNetBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$article = getSimpleHTMLDOMCached($item['uri']);
|
||||
if (!$article) {
|
||||
$this->logger->info('Unable to parse the dom from ' . $item['uri']);
|
||||
|
|
|
@ -59,10 +59,8 @@ class ZeitBridge extends FeedExpander
|
|||
$this->collectExpandableDatas($url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$item['enclosures'] = [];
|
||||
|
||||
$headers = [
|
||||
|
|
|
@ -1,85 +1,35 @@
|
|||
`FeedExpander` extends [`BridgeAbstract`](./02_BridgeAbstract.md) and adds functions to collect data from existing feeds.
|
||||
|
||||
**Usage example**: _You have discovered a site that provides feeds which are hidden and inaccessible by normal means. You want your bridge to directly read the feeds and provide them via **RSS-Bridge**_
|
||||
|
||||
To create a new Bridge extending `FeedExpander` you must implement all required functions of [`BridgeAbstract`](./02_BridgeAbstract.md). `FeedExpander` additionally provides following functions:
|
||||
|
||||
* [`parseItem`](#the-parseitem-function)
|
||||
* [`getName`](#the-getname-function)
|
||||
* [`getURI`](#the-geturi-function)
|
||||
* [`getDescription`](#the-getdescription-function)
|
||||
|
||||
Find a [template](#template) at the end of this file.
|
||||
|
||||
**Notice:** For a standard feed only `collectData` need to be implemented. `collectData` should call `$this->collectExpandableDatas('your URI here');` to automatically load feed items and header data (will subsequently call `parseItem` for each item in the feed). You can limit the number of items to fetch by specifying an additional parameter for: `$this->collectExpandableDatas('your URI here', 10)` (limited to 10 items).
|
||||
|
||||
## The `parseItem` function
|
||||
## The `parseItem` method
|
||||
|
||||
This function receives one item from the current feed and should return one **RSS-Bridge** item.
|
||||
This method receives one item from the current feed and should return one **RSS-Bridge** item.
|
||||
The default function does all the work to get the item data from the feed, whether it is RSS 1.0,
|
||||
RSS 2.0 or Atom 1.0. If you have to redefine this function in your **RSS-Bridge** for whatever reason,
|
||||
you should first call the parent function to initialize the item, then apply the changes that you require.
|
||||
RSS 2.0 or Atom 1.0.
|
||||
|
||||
**Notice:** The following code sample is just an example. Implementation depends on your requirements!
|
||||
|
||||
```PHP
|
||||
protected function parseItem($feedItem){
|
||||
$item = parent::parseItem($feedItem);
|
||||
$item['content'] = str_replace('rssbridge','RSS-Bridge',$feedItem->content);
|
||||
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item['content'] = str_replace('rssbridge','RSS-Bridge',$item['content']);
|
||||
return $item;
|
||||
}
|
||||
```
|
||||
|
||||
### Helper functions
|
||||
### Feed parsing
|
||||
|
||||
The `FeedExpander` already provides a set of functions to parse RSS or Atom items based on the specifications. Where possible make use of these functions:
|
||||
|
||||
Function | Description
|
||||
---------|------------
|
||||
`parseATOMItem` | Parses an Atom 1.0 feed item
|
||||
`parseRSS_0_9_1_Item` | Parses an RSS 0.91 feed item
|
||||
`parseRSS_1_0_Item` | Parses an RSS 1.0 feed item
|
||||
`parseRSS_2_0_Item` | Parses an RSS 2.0 feed item
|
||||
|
||||
In the following list you'll find the feed tags assigned to the the **RSS-Bridge** item keys:
|
||||
How rss-bridge processes xml feeds:
|
||||
|
||||
Function | uri | title | timestamp | author | content
|
||||
---------|-----|-------|-----------|--------|--------
|
||||
`parseATOMItem` | id | title | updated | author | content
|
||||
`parseRSS_0_9_1_Item` | link | title | | | description
|
||||
`parseRSS_1_0_Item` | link | title | dc:date | dc:creator | description
|
||||
`parseRSS_2_0_Item` | link, guid | title | pubDate, dc:date | author, dc:creator | description
|
||||
|
||||
## The `getName` function
|
||||
|
||||
Returns the name of the current feed.
|
||||
|
||||
```PHP
|
||||
return $this->name;
|
||||
```
|
||||
|
||||
**Notice:** Only implement this function if you require different behavior!
|
||||
|
||||
## The `getURI` function
|
||||
|
||||
Return the uri for the current feed.
|
||||
|
||||
```PHP
|
||||
return $this->uri;
|
||||
```
|
||||
|
||||
**Notice:** Only implement this function if you require different behavior!
|
||||
|
||||
## The `getDescription` function
|
||||
|
||||
Returns the description for the current bridge.
|
||||
|
||||
```PHP
|
||||
return $this->description;
|
||||
```
|
||||
|
||||
**Notice:** Only implement this function if you require different behavior!
|
||||
`atom` | id | title | updated | author | content
|
||||
`rss 0.91` | link | title | | | description
|
||||
`rss 1.0` | link | title | dc:date | dc:creator | description
|
||||
`rss 2.0` | link, guid | title | pubDate, dc:date | author, dc:creator | description
|
||||
|
||||
# Template
|
||||
|
||||
|
@ -87,19 +37,19 @@ This is the template for a new bridge:
|
|||
|
||||
```PHP
|
||||
<?php
|
||||
class MySiteBridge extends FeedExpander {
|
||||
class MySiteBridge extends FeedExpander
|
||||
{
|
||||
|
||||
const MAINTAINER = 'No maintainer';
|
||||
const NAME = 'Unnamed bridge';
|
||||
const URI = '';
|
||||
const DESCRIPTION = 'No description provided';
|
||||
const PARAMETERS = [];
|
||||
const CACHE_TIMEOUT = 3600;
|
||||
const MAINTAINER = 'No maintainer';
|
||||
const NAME = 'Unnamed bridge';
|
||||
const URI = '';
|
||||
const DESCRIPTION = 'No description provided';
|
||||
const PARAMETERS = [];
|
||||
const CACHE_TIMEOUT = 3600;
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas('your feed URI');
|
||||
}
|
||||
}
|
||||
// Imaginary empty line!
|
||||
```
|
|
@ -7,7 +7,7 @@ and extends one of the base classes of **RSS-Bridge**:
|
|||
Base class | Description
|
||||
-----------|------------
|
||||
[`BridgeAbstract`](./02_BridgeAbstract.md) | This class is intended for standard _Bridges_ that need to filter HTML pages for content.
|
||||
[`FeedExpander`](./03_FeedExpander.md) | This class is an extension of `HttpCachingBridgeAbstract`, designed to load existing feeds into **RSS-Bridge**
|
||||
[`FeedExpander`](./03_FeedExpander.md) | Expand/modify existing feed urls
|
||||
[`XPathAbstract`](./04_XPathAbstract.md) | This class is meant as an alternative base class for bridge implementations. It offers preliminary functionality for generating feeds based on _XPath expressions_.
|
||||
|
||||
For more information about how to create a new _Bridge_, read [How to create a new Bridge?](./01_How_to_create_a_new_bridge.md)
|
|
@ -5,111 +5,57 @@
|
|||
*/
|
||||
abstract class FeedExpander extends BridgeAbstract
|
||||
{
|
||||
const FEED_TYPE_RSS_1_0 = 'RSS_1_0';
|
||||
const FEED_TYPE_RSS_2_0 = 'RSS_2_0';
|
||||
const FEED_TYPE_ATOM_1_0 = 'ATOM_1_0';
|
||||
|
||||
private string $feedType;
|
||||
private FeedParser $feedParser;
|
||||
private array $parsedFeed;
|
||||
|
||||
public function __construct(CacheInterface $cache, Logger $logger)
|
||||
{
|
||||
parent::__construct($cache, $logger);
|
||||
$this->feedParser = new FeedParser();
|
||||
}
|
||||
private array $feed;
|
||||
|
||||
public function collectExpandableDatas(string $url, $maxItems = -1)
|
||||
{
|
||||
if (!$url) {
|
||||
throw new \Exception('There is no $url for this RSS expander');
|
||||
}
|
||||
$maxItems = (int) $maxItems;
|
||||
if ($maxItems === -1) {
|
||||
$maxItems = 999;
|
||||
}
|
||||
$accept = [MrssFormat::MIME_TYPE, AtomFormat::MIME_TYPE, '*/*'];
|
||||
$httpHeaders = ['Accept: ' . implode(', ', $accept)];
|
||||
// Notice we do not use cache here on purpose. We want a fresh view of the RSS stream each time
|
||||
$xmlString = getContents($url, $httpHeaders);
|
||||
if ($xmlString === '') {
|
||||
throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10);
|
||||
}
|
||||
// Maybe move this call earlier up the stack frames
|
||||
// Disable triggering of the php error-handler and handle errors manually instead
|
||||
libxml_use_internal_errors(true);
|
||||
// Consider replacing libxml with https://www.php.net/domdocument
|
||||
// Intentionally not using the silencing operator (@) because it has no effect here
|
||||
$xml = simplexml_load_string(trim($xmlString));
|
||||
if ($xml === false) {
|
||||
$xmlErrors = libxml_get_errors();
|
||||
foreach ($xmlErrors as $xmlError) {
|
||||
Debug::log(trim($xmlError->message));
|
||||
}
|
||||
if ($xmlErrors) {
|
||||
// Render only the first error into exception message
|
||||
$firstXmlErrorMessage = $xmlErrors[0]->message;
|
||||
}
|
||||
throw new \Exception(sprintf('Unable to parse xml from `%s` %s', $url, $firstXmlErrorMessage ?? ''), 11);
|
||||
}
|
||||
// Restore previous behaviour in case other code relies on it being off
|
||||
libxml_use_internal_errors(false);
|
||||
|
||||
// Currently only feed metadata (not items) are plucked out
|
||||
$this->parsedFeed = $this->feedParser->parseFeed($xmlString);
|
||||
|
||||
if (isset($xml->item[0])) {
|
||||
$this->feedType = self::FEED_TYPE_RSS_1_0;
|
||||
$items = $xml->item;
|
||||
} elseif (isset($xml->channel[0])) {
|
||||
$this->feedType = self::FEED_TYPE_RSS_2_0;
|
||||
$items = $xml->channel[0]->item;
|
||||
} elseif (isset($xml->entry[0])) {
|
||||
$this->feedType = self::FEED_TYPE_ATOM_1_0;
|
||||
$items = $xml->entry;
|
||||
} else {
|
||||
throw new \Exception(sprintf('Unable to detect feed format from `%s`', $url));
|
||||
}
|
||||
$feedParser = new FeedParser();
|
||||
$this->feed = $feedParser->parseFeed($xmlString);
|
||||
$items = array_slice($this->feed['items'], 0, $maxItems);
|
||||
foreach ($items as $item) {
|
||||
$parsedItem = $this->parseItem($item);
|
||||
if ($parsedItem) {
|
||||
$this->items[] = $parsedItem;
|
||||
}
|
||||
if (count($this->items) >= $maxItems) {
|
||||
break;
|
||||
// Give bridges a chance to modify the item
|
||||
$item = $this->parseItem($item);
|
||||
if ($item) {
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \SimpleXMLElement $item The feed item to be parsed
|
||||
* This method is overidden by bridges
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
switch ($this->feedType) {
|
||||
case self::FEED_TYPE_RSS_1_0:
|
||||
return $this->feedParser->parseRss1Item($item);
|
||||
case self::FEED_TYPE_RSS_2_0:
|
||||
return $this->feedParser->parseRss2Item($item);
|
||||
case self::FEED_TYPE_ATOM_1_0:
|
||||
return $this->feedParser->parseAtomItem($item);
|
||||
default:
|
||||
throw new \Exception(sprintf('Unknown version %s!', $this->getInput('version')));
|
||||
}
|
||||
return $item;
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
return $this->parsedFeed['uri'] ?? parent::getURI();
|
||||
return $this->feed['uri'] ?? parent::getURI();
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
return $this->parsedFeed['title'] ?? parent::getName();
|
||||
return $this->feed['title'] ?? parent::getName();
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return $this->parsedFeed['icon'] ?? parent::getIcon();
|
||||
return $this->feed['icon'] ?? parent::getIcon();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,10 +14,10 @@ final class FeedParser
|
|||
throw new \Exception('Unable to parse xml');
|
||||
}
|
||||
$feed = [
|
||||
'title' => null,
|
||||
'url' => null,
|
||||
'icon' => null,
|
||||
'items' => [],
|
||||
'title' => null,
|
||||
'uri' => null,
|
||||
'icon' => null,
|
||||
'items' => [],
|
||||
];
|
||||
if (isset($xml->item[0])) {
|
||||
// rss 1.0
|
||||
|
|
Loading…
Add table
Reference in a new issue