2022-05-08 04:08:55 +02:00
|
|
|
<?php
|
|
|
|
|
|
|
|
class GolemBridge extends FeedExpander
|
|
|
|
{
|
|
|
|
const MAINTAINER = 'Mynacol';
|
|
|
|
const NAME = 'Golem Bridge';
|
|
|
|
const URI = 'https://www.golem.de/';
|
|
|
|
const CACHE_TIMEOUT = 1800; // 30min
|
|
|
|
const DESCRIPTION = 'Returns the full articles instead of only the intro';
|
|
|
|
const PARAMETERS = [[
|
|
|
|
'category' => [
|
|
|
|
'name' => 'Category',
|
|
|
|
'type' => 'list',
|
|
|
|
'values' => [
|
|
|
|
'Alle News'
|
|
|
|
=> 'https://rss.golem.de/rss.php?feed=ATOM1.0',
|
|
|
|
'Audio/Video'
|
|
|
|
=> 'https://rss.golem.de/rss.php?ms=audio-video&feed=ATOM1.0',
|
|
|
|
'Auto'
|
|
|
|
=> 'https://rss.golem.de/rss.php?ms=auto&feed=ATOM1.0',
|
|
|
|
'Foto'
|
|
|
|
=> 'https://rss.golem.de/rss.php?ms=foto&feed=ATOM1.0',
|
|
|
|
'Games'
|
|
|
|
=> 'https://rss.golem.de/rss.php?ms=games&feed=ATOM1.0',
|
|
|
|
'Handy'
|
|
|
|
=> 'https://rss.golem.de/rss.php?ms=handy&feed=ATOM1.0',
|
|
|
|
'Internet'
|
|
|
|
=> 'https://rss.golem.de/rss.php?ms=internet&feed=ATOM1.0',
|
|
|
|
'Mobil'
|
|
|
|
=> 'https://rss.golem.de/rss.php?ms=mobil&feed=ATOM1.0',
|
|
|
|
'Open Source'
|
|
|
|
=> 'https://rss.golem.de/rss.php?ms=open-source&feed=ATOM1.0',
|
|
|
|
'Politik/Recht'
|
|
|
|
=> 'https://rss.golem.de/rss.php?ms=politik-recht&feed=ATOM1.0',
|
|
|
|
'Security'
|
|
|
|
=> 'https://rss.golem.de/rss.php?ms=security&feed=ATOM1.0',
|
|
|
|
'Desktop-Applikationen'
|
|
|
|
=> 'https://rss.golem.de/rss.php?ms=desktop-applikationen&feed=ATOM1.0',
|
|
|
|
'Software-Entwicklung'
|
|
|
|
=> 'https://rss.golem.de/rss.php?ms=softwareentwicklung&feed=ATOM1.0',
|
|
|
|
'Wirtschaft'
|
|
|
|
=> 'https://rss.golem.de/rss.php?ms=wirtschaft&feed=ATOM1.0',
|
|
|
|
'Wissenschaft'
|
|
|
|
=> 'https://rss.golem.de/rss.php?ms=wissenschaft&feed=ATOM1.0'
|
2022-07-01 15:10:30 +02:00
|
|
|
]
|
2022-05-08 04:08:55 +02:00
|
|
|
],
|
|
|
|
'limit' => [
|
|
|
|
'name' => 'Limit',
|
|
|
|
'type' => 'number',
|
|
|
|
'required' => false,
|
|
|
|
'title' => 'Specify number of full articles to return',
|
|
|
|
'defaultValue' => 5
|
2022-07-01 15:10:30 +02:00
|
|
|
]
|
2022-05-08 04:08:55 +02:00
|
|
|
]];
|
|
|
|
const LIMIT = 5;
|
|
|
|
const HEADERS = ['Cookie: golem_consent20=simple|220101;'];
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2022-05-08 04:08:55 +02:00
|
|
|
public function collectData()
|
|
|
|
{
|
|
|
|
$this->collectExpandableDatas(
|
|
|
|
$this->getInput('category'),
|
|
|
|
$this->getInput('limit') ?: static::LIMIT
|
|
|
|
);
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2023-10-13 01:59:05 +02:00
|
|
|
protected function parseItem(array $item)
|
2022-05-08 04:08:55 +02:00
|
|
|
{
|
|
|
|
$item['content'] ??= '';
|
|
|
|
$uri = $item['uri'];
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2022-10-25 14:30:01 +02:00
|
|
|
$urls = [];
|
|
|
|
|
2022-05-08 04:08:55 +02:00
|
|
|
while ($uri) {
|
2022-10-25 14:30:01 +02:00
|
|
|
if (isset($urls[$uri])) {
|
|
|
|
// Prevent forever a loop
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
$urls[$uri] = true;
|
|
|
|
|
2022-05-08 04:08:55 +02:00
|
|
|
$articlePage = getSimpleHTMLDOMCached($uri, static::CACHE_TIMEOUT, static::HEADERS);
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2022-05-08 04:08:55 +02:00
|
|
|
// URI without RSS feed reference
|
|
|
|
$item['uri'] = $articlePage->find('head meta[name="twitter:url"]', 0)->content;
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2023-05-08 16:21:03 +02:00
|
|
|
$categories = $articlePage->find('ul.tags__list li');
|
|
|
|
foreach ($categories as $category) {
|
|
|
|
$trimmedcategories[] = trim(html_entity_decode($category->plaintext));
|
|
|
|
}
|
|
|
|
if (isset($trimmedcategories)) {
|
|
|
|
$item['categories'] = array_unique($trimmedcategories);
|
|
|
|
}
|
|
|
|
|
2022-05-08 04:08:55 +02:00
|
|
|
$item['content'] .= $this->extractContent($articlePage);
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2022-05-08 04:08:55 +02:00
|
|
|
// next page
|
|
|
|
$nextUri = $articlePage->find('link[rel="next"]', 0);
|
|
|
|
$uri = $nextUri ? static::URI . $nextUri->href : null;
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2022-05-08 04:08:55 +02:00
|
|
|
return $item;
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2022-05-08 04:08:55 +02:00
|
|
|
private function extractContent($page)
|
|
|
|
{
|
|
|
|
$item = '';
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2022-05-08 04:08:55 +02:00
|
|
|
$article = $page->find('article', 0);
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2024-03-28 19:41:56 +01:00
|
|
|
//built youtube iframes
|
|
|
|
foreach ($article->find('.embedcontent') as &$embedcontent) {
|
|
|
|
$ytscript = $embedcontent->find('script', 0);
|
2024-07-04 20:53:49 +02:00
|
|
|
if (preg_match('/(www.youtube.com.*?)\"/', $ytscript->innertext, $link)) {
|
|
|
|
$link = 'https://' . str_replace('\\', '', $link[1]);
|
2024-03-28 19:41:56 +01:00
|
|
|
$embedcontent->innertext .= <<<EOT
|
2024-07-04 20:53:49 +02:00
|
|
|
<iframe width="560" height="315" src="$link" title="YouTube video player" frameborder="0"
|
2024-03-28 19:41:56 +01:00
|
|
|
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
|
|
|
|
referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>';
|
|
|
|
EOT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-04-03 16:23:52 +02:00
|
|
|
//built golem videos
|
|
|
|
foreach ($article->find('.gvideofig') as &$embedcontent) {
|
|
|
|
if (preg_match('/gvideo_(.*)/', $embedcontent->id, $videoid)) {
|
|
|
|
$embedcontent->innertext .= <<<EOT
|
|
|
|
<video class="rmp-object-fit-contain rmp-video" x-webkit-airplay="allow" controlslist="nodownload" tabindex="-1"
|
|
|
|
preload="metadata" src="https://video.golem.de/download/$videoid[1]"></video>
|
|
|
|
EOT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-08 04:08:55 +02:00
|
|
|
// delete known bad elements
|
|
|
|
foreach (
|
2023-05-10 22:14:34 +02:00
|
|
|
$article->find('div[id*="adtile"], #job-market, #seminars, iframe,
|
2024-03-28 19:41:56 +01:00
|
|
|
div.gbox_affiliate, div.toc') as $bad
|
2022-05-08 04:08:55 +02:00
|
|
|
) {
|
|
|
|
$bad->remove();
|
|
|
|
}
|
|
|
|
// reload html, as remove() is buggy
|
|
|
|
$article = str_get_html($article->outertext);
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
|
|
|
2022-05-08 04:08:55 +02:00
|
|
|
$header = $article->find('header', 0);
|
|
|
|
foreach ($header->find('p, figure') as $element) {
|
|
|
|
$item .= $element;
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2022-05-08 04:08:55 +02:00
|
|
|
$content = $article->find('div.formatted', 0);
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2022-06-04 22:27:24 +02:00
|
|
|
// full image quality
|
|
|
|
foreach ($content->find('img[data-src-full][src*="."]') as $img) {
|
2022-05-08 04:08:55 +02:00
|
|
|
$img->src = $img->getAttribute('data-src-full');
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2024-04-03 16:23:52 +02:00
|
|
|
foreach ($content->find('p, h1, h2, h3, img[src*="."], iframe, video') as $element) {
|
2022-05-08 04:08:55 +02:00
|
|
|
$item .= $element;
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2022-05-08 04:08:55 +02:00
|
|
|
return $item;
|
|
|
|
}
|
|
|
|
}
|