mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2024-11-26 19:36:52 +03:00
8d8fe66aab
* [HeiseBridge] Parser rewrite This rewrite is more readable and consistent than the previous one. Additionally, this removes unwanted elements, largely recommendations for other articles. Furthermore, it increases the image quality by using the original picture link instead of the compressed ones. * [HeiseBridge] Formatting
105 lines
3.5 KiB
PHP
105 lines
3.5 KiB
PHP
<?php
|
|
|
|
class HeiseBridge extends FeedExpander
|
|
{
|
|
const MAINTAINER = 'Dreckiger-Dan';
|
|
const NAME = 'Heise Online Bridge';
|
|
const URI = 'https://heise.de/';
|
|
const CACHE_TIMEOUT = 1800; // 30min
|
|
const DESCRIPTION = 'Returns the full articles instead of only the intro';
|
|
const PARAMETERS = [[
|
|
'category' => [
|
|
'name' => 'Category',
|
|
'type' => 'list',
|
|
'values' => [
|
|
'Alle News'
|
|
=> 'https://www.heise.de/newsticker/heise-atom.xml',
|
|
'Top-News'
|
|
=> 'https://www.heise.de/newsticker/heise-top-atom.xml',
|
|
'Internet-Störungen'
|
|
=> 'https://www.heise.de/netze/netzwerk-tools/imonitor-internet-stoerungen/feed/aktuelle-meldungen/',
|
|
'Alle News von heise Developer'
|
|
=> 'https://www.heise.de/developer/rss/news-atom.xml'
|
|
]
|
|
],
|
|
'limit' => [
|
|
'name' => 'Limit',
|
|
'type' => 'number',
|
|
'required' => false,
|
|
'title' => 'Specify number of full articles to return',
|
|
'defaultValue' => 5
|
|
]
|
|
]];
|
|
const LIMIT = 5;
|
|
|
|
public function collectData()
|
|
{
|
|
$this->collectExpandableDatas(
|
|
$this->getInput('category'),
|
|
$this->getInput('limit') ?: static::LIMIT
|
|
);
|
|
}
|
|
|
|
protected function parseItem($feedItem)
|
|
{
|
|
$item = parent::parseItem($feedItem);
|
|
$item['uri'] = explode('?', $item['uri'])[0] . '?seite=all';
|
|
|
|
if (strpos($item['uri'], 'https://www.heise.de') !== 0) {
|
|
return $item;
|
|
}
|
|
|
|
$article = getSimpleHTMLDOMCached($item['uri']);
|
|
|
|
if ($article) {
|
|
$article = defaultLinkTo($article, $item['uri']);
|
|
$item = $this->addArticleToItem($item, $article);
|
|
}
|
|
|
|
return $item;
|
|
}
|
|
|
|
private function addArticleToItem($item, $article)
|
|
{
|
|
// copy full-res img src to standard img element
|
|
foreach ($article->find('a-img') as $aimg) {
|
|
$img = $aimg->find('img', 0);
|
|
$img->src = $aimg->src;
|
|
// client scales based on aspect ratio in style attribute
|
|
$img->width = '';
|
|
$img->height = '';
|
|
}
|
|
// relink URIs, as the previous a-img tags weren't recognized by this function
|
|
$article = defaultLinkTo($article, $item['uri']);
|
|
|
|
// remove unwanted stuff
|
|
foreach ($article->find('figure.branding, a-ad, div.ho-text, noscript img, .opt-in__content-container') as $element) {
|
|
$element->remove();
|
|
}
|
|
// reload html, as remove() is buggy
|
|
$article = str_get_html($article->outertext);
|
|
|
|
$header = $article->find('header.a-article-header', 0);
|
|
$headerElements = $header->find('p, a-img img, figure img');
|
|
$item['content'] = implode('', $headerElements);
|
|
|
|
$authors = $header->find('.a-creator__names .a-creator__name');
|
|
if ($authors) {
|
|
$item['author'] = implode(', ', array_map(function ($e) {
|
|
return $e->plaintext;
|
|
}, $authors));
|
|
}
|
|
|
|
$content = $article->find('.article-content', 0);
|
|
$contentElements = $content->find(
|
|
'p, h3, ul, table, pre, a-img img, a-bilderstrecke h2, a-bilderstrecke figure, a-bilderstrecke figcaption'
|
|
);
|
|
$item['content'] .= implode('', $contentElements);
|
|
|
|
foreach ($article->find('a-img img, a-bilderstrecke img, figure img') as $img) {
|
|
$item['enclosures'][] = $img->src;
|
|
}
|
|
|
|
return $item;
|
|
}
|
|
}
|