array( 'name' => 'Category', 'type' => 'list', 'values' => array( 'Alle News' => 'https://rss.golem.de/rss.php?feed=ATOM1.0', 'Audio/Video' => 'https://rss.golem.de/rss.php?ms=audio-video&feed=ATOM1.0', 'Auto' => 'https://rss.golem.de/rss.php?ms=auto&feed=ATOM1.0', 'Foto' => 'https://rss.golem.de/rss.php?ms=foto&feed=ATOM1.0', 'Games' => 'https://rss.golem.de/rss.php?ms=games&feed=ATOM1.0', 'Handy' => 'https://rss.golem.de/rss.php?ms=handy&feed=ATOM1.0', 'Internet' => 'https://rss.golem.de/rss.php?ms=internet&feed=ATOM1.0', 'Mobil' => 'https://rss.golem.de/rss.php?ms=mobil&feed=ATOM1.0', 'Open Source' => 'https://rss.golem.de/rss.php?ms=open-source&feed=ATOM1.0', 'Politik/Recht' => 'https://rss.golem.de/rss.php?ms=politik-recht&feed=ATOM1.0', 'Security' => 'https://rss.golem.de/rss.php?ms=security&feed=ATOM1.0', 'Desktop-Applikationen' => 'https://rss.golem.de/rss.php?ms=desktop-applikationen&feed=ATOM1.0', 'Software-Entwicklung' => 'https://rss.golem.de/rss.php?ms=softwareentwicklung&feed=ATOM1.0', 'Wirtschaft' => 'https://rss.golem.de/rss.php?ms=wirtschaft&feed=ATOM1.0', 'Wissenschaft' => 'https://rss.golem.de/rss.php?ms=wissenschaft&feed=ATOM1.0' ) ), 'limit' => array( 'name' => 'Limit', 'type' => 'number', 'required' => false, 'title' => 'Specify number of full articles to return', 'defaultValue' => 5 ) )); const LIMIT = 5; const HEADERS = array('Cookie: golem_consent20=simple|220101;'); public function collectData() { $this->collectExpandableDatas( $this->getInput('category'), $this->getInput('limit') ?: static::LIMIT ); } protected function parseItem($item) { $item = parent::parseItem($item); $item['content'] = $item['content'] ?? ''; $uri = $item['uri']; while ($uri) { $articlePage = getSimpleHTMLDOMCached($uri, static::CACHE_TIMEOUT, static::HEADERS); // URI without RSS feed reference $item['uri'] = $articlePage->find('head meta[name="twitter:url"]', 0)->content; $author = $articlePage->find('article header .authors .authors__name', 0); if ($author) { $item['author'] = $author->innertext; } $item['content'] .= $this->extractContent($articlePage); // next page $nextUri = $articlePage->find('link[rel="next"]', 0); $uri = $nextUri ? static::URI . $nextUri->href : null; } return $item; } private function extractContent($page) { $item = ''; $article = $page->find('article', 0); // delete known bad elements foreach($article->find('div[id*="adtile"], #job-market, #seminars, div.gbox_affiliate, div.toc, .embedcontent') as $bad) { $bad->remove(); } // reload html, as remove() is buggy $article = str_get_html($article->outertext); if ($pageHeader = $article->find('header.paged-cluster-header h1', 0)) { $item .= $pageHeader; } $header = $article->find('header', 0); foreach($header->find('p, figure') as $element) { $item .= $element; } $content = $article->find('div.formatted', 0); // full image quality foreach($content->find('img[data-src-full][src*="."]') as $img) { $img->src = $img->getAttribute('data-src-full'); } foreach($content->find('p, h1, h3, img[src*="."]') as $element) { $item .= $element; } return $item; } }