mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-03-14 20:21:14 +03:00
[GolemBridge] Add golem.de bridge (#2696)
This commit is contained in:
parent
5d77d14f9d
commit
56e991122b
1 changed files with 125 additions and 0 deletions
125
bridges/GolemBridge.php
Normal file
125
bridges/GolemBridge.php
Normal file
|
@ -0,0 +1,125 @@
|
|||
<?php
|
||||
|
||||
class GolemBridge extends FeedExpander {
|
||||
const MAINTAINER = 'Mynacol';
|
||||
const NAME = 'Golem Bridge';
|
||||
const URI = 'https://www.golem.de/';
|
||||
const CACHE_TIMEOUT = 1800; // 30min
|
||||
const DESCRIPTION = 'Returns the full articles instead of only the intro';
|
||||
const PARAMETERS = array(array(
|
||||
'category' => array(
|
||||
'name' => 'Category',
|
||||
'type' => 'list',
|
||||
'values' => array(
|
||||
'Alle News'
|
||||
=> 'https://rss.golem.de/rss.php?feed=ATOM1.0',
|
||||
'Audio/Video'
|
||||
=> 'https://rss.golem.de/rss.php?ms=audio-video&feed=ATOM1.0',
|
||||
'Auto'
|
||||
=> 'https://rss.golem.de/rss.php?ms=auto&feed=ATOM1.0',
|
||||
'Foto'
|
||||
=> 'https://rss.golem.de/rss.php?ms=foto&feed=ATOM1.0',
|
||||
'Games'
|
||||
=> 'https://rss.golem.de/rss.php?ms=games&feed=ATOM1.0',
|
||||
'Handy'
|
||||
=> 'https://rss.golem.de/rss.php?ms=handy&feed=ATOM1.0',
|
||||
'Internet'
|
||||
=> 'https://rss.golem.de/rss.php?ms=internet&feed=ATOM1.0',
|
||||
'Mobil'
|
||||
=> 'https://rss.golem.de/rss.php?ms=mobil&feed=ATOM1.0',
|
||||
'Open Source'
|
||||
=> 'https://rss.golem.de/rss.php?ms=open-source&feed=ATOM1.0',
|
||||
'Politik/Recht'
|
||||
=> 'https://rss.golem.de/rss.php?ms=politik-recht&feed=ATOM1.0',
|
||||
'Security'
|
||||
=> 'https://rss.golem.de/rss.php?ms=security&feed=ATOM1.0',
|
||||
'Desktop-Applikationen'
|
||||
=> 'https://rss.golem.de/rss.php?ms=desktop-applikationen&feed=ATOM1.0',
|
||||
'Software-Entwicklung'
|
||||
=> 'https://rss.golem.de/rss.php?ms=softwareentwicklung&feed=ATOM1.0',
|
||||
'Wirtschaft'
|
||||
=> 'https://rss.golem.de/rss.php?ms=wirtschaft&feed=ATOM1.0',
|
||||
'Wissenschaft'
|
||||
=> 'https://rss.golem.de/rss.php?ms=wissenschaft&feed=ATOM1.0'
|
||||
)
|
||||
),
|
||||
'limit' => array(
|
||||
'name' => 'Limit',
|
||||
'type' => 'number',
|
||||
'required' => false,
|
||||
'title' => 'Specify number of full articles to return',
|
||||
'defaultValue' => 5
|
||||
)
|
||||
));
|
||||
const LIMIT = 5;
|
||||
const HEADERS = array('Cookie: golem_consent20=simple|220101;');
|
||||
|
||||
public function collectData() {
|
||||
$this->collectExpandableDatas(
|
||||
$this->getInput('category'),
|
||||
$this->getInput('limit') ?: static::LIMIT
|
||||
);
|
||||
}
|
||||
|
||||
protected function parseItem($item) {
|
||||
$item = parent::parseItem($item);
|
||||
$item['content'] = $item['content'] ?? '';
|
||||
$uri = $item['uri'];
|
||||
|
||||
while ($uri) {
|
||||
$articlePage = getSimpleHTMLDOMCached($uri, static::CACHE_TIMEOUT, static::HEADERS);
|
||||
|
||||
// URI without RSS feed reference
|
||||
$item['uri'] = $articlePage->find('head meta[name="twitter:url"]', 0)->content;
|
||||
|
||||
$author = $articlePage->find('article header .authors .authors__name', 0);
|
||||
if ($author) {
|
||||
$item['author'] = $author->innertext;
|
||||
}
|
||||
|
||||
$item['content'] .= $this->extractContent($articlePage);
|
||||
|
||||
// next page
|
||||
$nextUri = $articlePage->find('link[rel="next"]', 0);
|
||||
$uri = $nextUri ? static::URI . $nextUri->href : null;
|
||||
}
|
||||
|
||||
return $item;
|
||||
}
|
||||
|
||||
private function extractContent($page) {
|
||||
$item = '';
|
||||
|
||||
$article = $page->find('article', 0);
|
||||
|
||||
// delete known bad elements
|
||||
foreach($article->find('div[id*="adtile"], #job-market, #seminars,
|
||||
div.gbox_affiliate, div.toc, .embedcontent') as $bad) {
|
||||
$bad->remove();
|
||||
}
|
||||
// reload html, as remove() is buggy
|
||||
$article = str_get_html($article->outertext);
|
||||
|
||||
if ($pageHeader = $article->find('header.paged-cluster-header h1', 0)) {
|
||||
$item .= $pageHeader;
|
||||
}
|
||||
|
||||
$header = $article->find('header', 0);
|
||||
foreach($header->find('p, figure') as $element) {
|
||||
$item .= $element;
|
||||
}
|
||||
|
||||
$content = $article->find('div.formatted', 0);
|
||||
|
||||
// fix image galleries (empty src attribute), additionally full image quality
|
||||
foreach($content->find('img[data-src-full]') as $img) {
|
||||
$img->src = $img->getAttribute('data-src-full');
|
||||
}
|
||||
|
||||
foreach($content->find('p, h1, h3, img') as $element) {
|
||||
$item .= $element;
|
||||
}
|
||||
|
||||
return $item;
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue