mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2024-12-25 05:38:21 +03:00
66a6847fd0
* [DeutscheWelleBridge] Small URL fix. Reset the $item's uri value after removing the tracking query string. * [DeutscheWelleBridge] Fix "hero" images. The main "hero" image for each article has src="" and relies on the srcset attribute for the browser to pick the best image based on the actual displayed size. The call to `defaultLinkTo()` replaces the empty src with the article's link, which, not being an image, breaks the image. This change resets the src's of any such images back to "".
148 lines
5.5 KiB
PHP
148 lines
5.5 KiB
PHP
<?php
|
|
|
|
class DeutscheWelleBridge extends FeedExpander
|
|
{
|
|
const MAINTAINER = 'No maintainer';
|
|
const NAME = 'Deutsche Welle Bridge';
|
|
const URI = 'https://www.dw.com';
|
|
const DESCRIPTION = 'Returns the full articles instead of only the intro';
|
|
const CACHE_TIMEOUT = 3600;
|
|
const PARAMETERS = [[
|
|
'feed' => [
|
|
'name' => 'feed',
|
|
'type' => 'list',
|
|
'values' => [
|
|
'All Top Stories and News Updates'
|
|
=> 'http://rss.dw.com/atom/rss-en-all',
|
|
'Top Stories'
|
|
=> 'http://rss.dw.com/atom/rss-en-top',
|
|
'Germany'
|
|
=> 'http://rss.dw.com/atom/rss-en-ger',
|
|
'World'
|
|
=> 'http://rss.dw.com/atom/rss-en-world',
|
|
'Europe'
|
|
=> 'http://rss.dw.com/atom/rss-en-eu',
|
|
'Business'
|
|
=> 'http://rss.dw.com/atom/rss-en-bus',
|
|
'Science'
|
|
=> 'http://rss.dw.com/atom/rss_en_science',
|
|
'Environment'
|
|
=> 'http://rss.dw.com/atom/rss_en_environment',
|
|
'Culture & Lifestyle'
|
|
=> 'http://rss.dw.com/atom/rss-en-cul',
|
|
'Sports'
|
|
=> 'http://rss.dw.de/atom/rss-en-sports',
|
|
'Visit Germany'
|
|
=> 'http://rss.dw.com/atom/rss-en-visitgermany',
|
|
'Asia'
|
|
=> 'http://rss.dw.com/atom/rss-en-asia',
|
|
'Deutsche Welle Gesamt'
|
|
=> 'http://rss.dw.com/atom/rss-de-all',
|
|
'Themen des Tages'
|
|
=> 'http://rss.dw.com/atom/rss-de-top',
|
|
'Nachrichten'
|
|
=> 'http://rss.dw.com/atom/rss-de-news',
|
|
'Wissenschaft'
|
|
=> 'http://rss.dw.com/atom/rss-de-wissenschaft',
|
|
'Sport'
|
|
=> 'http://rss.dw.com/atom/rss-de-sport',
|
|
'Deutschland entdecken'
|
|
=> 'http://rss.dw.com/atom/rss-de-deutschlandentdecken',
|
|
'Presse'
|
|
=> 'http://rss.dw.com/atom/presse',
|
|
'Politik'
|
|
=> 'http://rss.dw.com/atom/rss_de_politik',
|
|
'Wirtschaft'
|
|
=> 'http://rss.dw.com/atom/rss-de-eco',
|
|
'Kultur & Leben'
|
|
=> 'http://rss.dw.com/atom/rss-de-cul',
|
|
'Kultur & Leben: Buch'
|
|
=> 'http://rss.dw.com/atom/rss-de-cul-buch',
|
|
'Kultur & Leben: Film'
|
|
=> 'http://rss.dw.com/atom/rss-de-cul-film',
|
|
'Kultur & Leben: Musik'
|
|
=> 'http://rss.dw.com/atom/rss-de-cul-musik',
|
|
]
|
|
]
|
|
]];
|
|
|
|
public function collectData()
|
|
{
|
|
$this->collectExpandableDatas($this->getInput('feed'));
|
|
}
|
|
|
|
protected function parseItem(array $item)
|
|
{
|
|
$parsedUri = parse_url($item['uri']);
|
|
unset($parsedUri['query']);
|
|
$item['uri'] = $this->unparseUrl($parsedUri);
|
|
|
|
$page = getSimpleHTMLDOM($item['uri']);
|
|
$page = defaultLinkTo($page, $item['uri']);
|
|
|
|
$article = $page->find('article', 0);
|
|
|
|
// author
|
|
$author = $article->find('.author-link > span', 0);
|
|
if ($author) {
|
|
$item['author'] = $author->text();
|
|
}
|
|
|
|
$teaser = $article->find('.teaser-text', 0);
|
|
if (!is_null($teaser)) {
|
|
$item['content'] = $teaser->outertext();
|
|
} else {
|
|
$item['content'] = '';
|
|
}
|
|
|
|
// remove unneeded elements
|
|
foreach (
|
|
$article->find(
|
|
'header, .advertisement, [data-tracking-name="sharing-icons-inline"], a.external-link > svg, picture > source, .vjs-wrapper, .dw-widget, footer'
|
|
) as $bad
|
|
) {
|
|
$bad->remove();
|
|
}
|
|
// reload html as remove() is buggy
|
|
$article = str_get_html($article->outertext());
|
|
|
|
// remove width and height values from img tags
|
|
foreach ($article->find('img') as $img) {
|
|
$img->width = null;
|
|
$img->height = null;
|
|
}
|
|
|
|
// remove bad img src's added by defaultLinkTo() above
|
|
// these images should have src="" and will then use
|
|
// the srcset attribute to load the best image for the displayed size
|
|
foreach ($article->find('figure > picture > img') as $img) {
|
|
$img->src = '';
|
|
}
|
|
|
|
// replace lazy-loaded images
|
|
foreach ($article->find('figure.placeholder-image') as $figure) {
|
|
$img = $figure->find('img', 0);
|
|
$img->src = str_replace('${formatId}', '906', $img->getAttribute('data-url'));
|
|
$img->style = null;
|
|
}
|
|
|
|
$item['content'] .= $article->save();
|
|
|
|
return $item;
|
|
}
|
|
|
|
// https://www.php.net/manual/en/function.parse-url.php#106731
|
|
private function unparseUrl($parsed_url)
|
|
{
|
|
$scheme = isset($parsed_url['scheme']) ? $parsed_url['scheme'] . '://' : '';
|
|
$host = isset($parsed_url['host']) ? $parsed_url['host'] : '';
|
|
$port = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : '';
|
|
$user = isset($parsed_url['user']) ? $parsed_url['user'] : '';
|
|
$pass = isset($parsed_url['pass']) ? $parsed_url['pass'] : '';
|
|
$pass = ($user || $pass) ? "$pass@" : '';
|
|
$path = isset($parsed_url['path']) ? $parsed_url['path'] : '';
|
|
$query = isset($parsed_url['query']) ? '?' . $parsed_url['query'] : '';
|
|
$fragment = isset($parsed_url['fragment']) ? '#' . $parsed_url['fragment'] : '';
|
|
return "$scheme$user$pass$host$port$path$query$fragment";
|
|
}
|
|
}
|