mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2024-11-25 10:56:18 +03:00
8865521b3b
Do not add all images of the image gallery, but only the preselected one. Often, the same gallery is used multiple times with different preselected images. The previous implementation always added all images of the gallery, cluttering the article. This patch only adds the preselected one. The no-js link wrapping around the gallery leads to a 403 Forbidden page, so linking that doesn't work to really support galleries.
125 lines
3.6 KiB
PHP
125 lines
3.6 KiB
PHP
<?php
|
|
|
|
class GolemBridge extends FeedExpander {
|
|
const MAINTAINER = 'Mynacol';
|
|
const NAME = 'Golem Bridge';
|
|
const URI = 'https://www.golem.de/';
|
|
const CACHE_TIMEOUT = 1800; // 30min
|
|
const DESCRIPTION = 'Returns the full articles instead of only the intro';
|
|
const PARAMETERS = array(array(
|
|
'category' => array(
|
|
'name' => 'Category',
|
|
'type' => 'list',
|
|
'values' => array(
|
|
'Alle News'
|
|
=> 'https://rss.golem.de/rss.php?feed=ATOM1.0',
|
|
'Audio/Video'
|
|
=> 'https://rss.golem.de/rss.php?ms=audio-video&feed=ATOM1.0',
|
|
'Auto'
|
|
=> 'https://rss.golem.de/rss.php?ms=auto&feed=ATOM1.0',
|
|
'Foto'
|
|
=> 'https://rss.golem.de/rss.php?ms=foto&feed=ATOM1.0',
|
|
'Games'
|
|
=> 'https://rss.golem.de/rss.php?ms=games&feed=ATOM1.0',
|
|
'Handy'
|
|
=> 'https://rss.golem.de/rss.php?ms=handy&feed=ATOM1.0',
|
|
'Internet'
|
|
=> 'https://rss.golem.de/rss.php?ms=internet&feed=ATOM1.0',
|
|
'Mobil'
|
|
=> 'https://rss.golem.de/rss.php?ms=mobil&feed=ATOM1.0',
|
|
'Open Source'
|
|
=> 'https://rss.golem.de/rss.php?ms=open-source&feed=ATOM1.0',
|
|
'Politik/Recht'
|
|
=> 'https://rss.golem.de/rss.php?ms=politik-recht&feed=ATOM1.0',
|
|
'Security'
|
|
=> 'https://rss.golem.de/rss.php?ms=security&feed=ATOM1.0',
|
|
'Desktop-Applikationen'
|
|
=> 'https://rss.golem.de/rss.php?ms=desktop-applikationen&feed=ATOM1.0',
|
|
'Software-Entwicklung'
|
|
=> 'https://rss.golem.de/rss.php?ms=softwareentwicklung&feed=ATOM1.0',
|
|
'Wirtschaft'
|
|
=> 'https://rss.golem.de/rss.php?ms=wirtschaft&feed=ATOM1.0',
|
|
'Wissenschaft'
|
|
=> 'https://rss.golem.de/rss.php?ms=wissenschaft&feed=ATOM1.0'
|
|
)
|
|
),
|
|
'limit' => array(
|
|
'name' => 'Limit',
|
|
'type' => 'number',
|
|
'required' => false,
|
|
'title' => 'Specify number of full articles to return',
|
|
'defaultValue' => 5
|
|
)
|
|
));
|
|
const LIMIT = 5;
|
|
const HEADERS = array('Cookie: golem_consent20=simple|220101;');
|
|
|
|
public function collectData() {
|
|
$this->collectExpandableDatas(
|
|
$this->getInput('category'),
|
|
$this->getInput('limit') ?: static::LIMIT
|
|
);
|
|
}
|
|
|
|
protected function parseItem($item) {
|
|
$item = parent::parseItem($item);
|
|
$item['content'] = $item['content'] ?? '';
|
|
$uri = $item['uri'];
|
|
|
|
while ($uri) {
|
|
$articlePage = getSimpleHTMLDOMCached($uri, static::CACHE_TIMEOUT, static::HEADERS);
|
|
|
|
// URI without RSS feed reference
|
|
$item['uri'] = $articlePage->find('head meta[name="twitter:url"]', 0)->content;
|
|
|
|
$author = $articlePage->find('article header .authors .authors__name', 0);
|
|
if ($author) {
|
|
$item['author'] = $author->innertext;
|
|
}
|
|
|
|
$item['content'] .= $this->extractContent($articlePage);
|
|
|
|
// next page
|
|
$nextUri = $articlePage->find('link[rel="next"]', 0);
|
|
$uri = $nextUri ? static::URI . $nextUri->href : null;
|
|
}
|
|
|
|
return $item;
|
|
}
|
|
|
|
private function extractContent($page) {
|
|
$item = '';
|
|
|
|
$article = $page->find('article', 0);
|
|
|
|
// delete known bad elements
|
|
foreach($article->find('div[id*="adtile"], #job-market, #seminars,
|
|
div.gbox_affiliate, div.toc, .embedcontent') as $bad) {
|
|
$bad->remove();
|
|
}
|
|
// reload html, as remove() is buggy
|
|
$article = str_get_html($article->outertext);
|
|
|
|
if ($pageHeader = $article->find('header.paged-cluster-header h1', 0)) {
|
|
$item .= $pageHeader;
|
|
}
|
|
|
|
$header = $article->find('header', 0);
|
|
foreach($header->find('p, figure') as $element) {
|
|
$item .= $element;
|
|
}
|
|
|
|
$content = $article->find('div.formatted', 0);
|
|
|
|
// full image quality
|
|
foreach($content->find('img[data-src-full][src*="."]') as $img) {
|
|
$img->src = $img->getAttribute('data-src-full');
|
|
}
|
|
|
|
foreach($content->find('p, h1, h3, img[src*="."]') as $element) {
|
|
$item .= $element;
|
|
}
|
|
|
|
return $item;
|
|
}
|
|
}
|