From a6e8760726c468d48ea45a3bb94577b7b94c08a8 Mon Sep 17 00:00:00 2001 From: Sebastian Wolf <117176763+swofl@users.noreply.github.com> Date: Sat, 23 Nov 2024 18:54:21 +0100 Subject: [PATCH] [FragDenStaatBridge] add new bridge (#4330) --- bridges/FragDenStaatBridge.php | 78 ++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 bridges/FragDenStaatBridge.php diff --git a/bridges/FragDenStaatBridge.php b/bridges/FragDenStaatBridge.php new file mode 100644 index 00000000..aee1885c --- /dev/null +++ b/bridges/FragDenStaatBridge.php @@ -0,0 +1,78 @@ + [ + 'name' => 'Query Limit', + 'title' => 'Amount of articles to query', + 'type' => 'number', + 'defaultValue' => 5, + ], + ] ]; + + protected function parseTeaser($teaser) + { + $result = []; + + $header = $teaser->find('h3 > a', 0); + $result['title'] = $header->plaintext; + $result['uri'] = static::URI . $header->href; + $result['enclosures'] = []; + $result['enclosures'][] = $teaser->find('img', 0)->src; + $result['uid'] = hash('sha256', $result['title']); + $result['timestamp'] = strtotime($teaser->find('time', 0)->getAttribute('datetime')); + + return $result; + } + + public function collectData() + { + $html = getSimpleHTMLDOM(self::URI . '/artikel/exklusiv/'); + + $queryLimit = (int) $this->getInput('qLimit'); + if ($queryLimit > 12) { + $queryLimit = 12; + } + + $teasers = []; + + $teaserElements = $html->find('article'); + + for ($i = 0; $i < $queryLimit; $i++) { + array_push($teasers, $this->parseTeaser($teaserElements[$i])); + } + + foreach ($teasers as $article) { + $articleHtml = getSimpleHTMLDOMCached($article['uri'], static::CACHE_TIMEOUT * 6); + $articleCore = $articleHtml->find('article.blog-article', 0); + + $content = ''; + + $lead = $articleCore->find('div.lead > p', 0)->innertext; + + $content .= '

' . $lead . '

'; + + foreach ($articleCore->find('div.blog-content > p, div.blog-content > h3') as $paragraph) { + $content .= $paragraph->outertext; + } + + $article['content'] = '' . $content; + + $article['author'] = ''; + + foreach ($articleCore->find('a[rel="author"]') as $author) { + $article['author'] .= $author->innertext . ', '; + } + + $article['author'] = rtrim($article['author'], ', '); + + $this->items[] = $article; + } + } +}