2020-03-31 22:14:16 +03:00
|
|
|
<?php
|
|
|
|
ini_set('max_execution_time', '300');
|
|
|
|
class NordbayernBridge extends BridgeAbstract {
|
|
|
|
|
|
|
|
const MAINTAINER = 'schabi.org';
|
2021-07-01 05:21:58 +03:00
|
|
|
const NAME = 'Nordbayern';
|
2020-03-31 22:14:16 +03:00
|
|
|
const CACHE_TIMEOUT = 3600;
|
|
|
|
const URI = 'https://www.nordbayern.de';
|
2022-03-25 02:28:06 +03:00
|
|
|
const DESCRIPTION = 'Bridge for Bavarian regional news site nordbayern.de';
|
2020-03-31 22:14:16 +03:00
|
|
|
const PARAMETERS = array( array(
|
|
|
|
'region' => array(
|
|
|
|
'name' => 'region',
|
|
|
|
'type' => 'list',
|
|
|
|
'exampleValue' => 'Nürnberg',
|
|
|
|
'title' => 'Select a region',
|
|
|
|
'values' => array(
|
|
|
|
'Nürnberg' => 'nuernberg',
|
|
|
|
'Fürth' => 'fuerth',
|
2021-08-15 16:57:40 +03:00
|
|
|
'Erlangen' => 'erlangen',
|
2020-03-31 22:14:16 +03:00
|
|
|
'Altdorf' => 'altdorf',
|
|
|
|
'Ansbach' => 'ansbach',
|
|
|
|
'Bad Windsheim' => 'bad-windsheim',
|
|
|
|
'Bamberg' => 'bamberg',
|
|
|
|
'Dinkelsbühl/Feuchtwangen' => 'dinkelsbuehl-feuchtwangen',
|
|
|
|
'Feucht' => 'feucht',
|
|
|
|
'Forchheim' => 'forchheim',
|
|
|
|
'Gunzenhausen' => 'gunzenhausen',
|
|
|
|
'Hersbruck' => 'hersbruck',
|
|
|
|
'Herzogenaurach' => 'herzogenaurach',
|
2021-02-21 19:43:23 +03:00
|
|
|
'Hilpoltstein' => 'hilpoltstein',
|
2020-03-31 22:14:16 +03:00
|
|
|
'Höchstadt' => 'hoechstadt',
|
|
|
|
'Lauf' => 'lauf',
|
|
|
|
'Neumarkt' => 'neumarkt',
|
|
|
|
'Neustadt/Aisch' => 'neustadt-aisch',
|
|
|
|
'Pegnitz' => 'pegnitz',
|
|
|
|
'Roth' => 'roth',
|
|
|
|
'Rothenburg o.d.T.' => 'rothenburg-o-d-t',
|
|
|
|
'Treuchtlingen' => 'treuchtlingen',
|
|
|
|
'Weißenburg' => 'weissenburg'
|
|
|
|
)
|
|
|
|
),
|
|
|
|
'policeReports' => array(
|
|
|
|
'name' => 'Police Reports',
|
|
|
|
'type' => 'checkbox',
|
|
|
|
'exampleValue' => 'checked',
|
2021-07-01 05:21:58 +03:00
|
|
|
'title' => 'Include Police Reports',
|
2020-03-31 22:14:16 +03:00
|
|
|
)
|
|
|
|
));
|
|
|
|
|
2021-07-01 05:21:58 +03:00
|
|
|
private function getUseFullContent($rawContent) {
|
|
|
|
$content = '';
|
|
|
|
foreach($rawContent->children as $element) {
|
|
|
|
if($element->tag === 'p' || $element->tag === 'h3') {
|
|
|
|
$content .= $element;
|
|
|
|
}
|
|
|
|
if($element->tag === 'main') {
|
|
|
|
$content .= self::getUseFullContent($element->find('article', 0));
|
|
|
|
}
|
|
|
|
if($element->tag === 'header') {
|
|
|
|
$content .= self::getUseFullContent($element);
|
|
|
|
}
|
2020-03-31 22:14:16 +03:00
|
|
|
}
|
2021-07-01 05:21:58 +03:00
|
|
|
return $content;
|
2020-03-31 22:14:16 +03:00
|
|
|
}
|
|
|
|
|
2021-11-06 21:28:12 +03:00
|
|
|
private function getValidImages($pictures) {
|
|
|
|
$images = array();
|
|
|
|
if(!empty($pictures)) {
|
|
|
|
for($i = 0; $i < count($pictures); $i++) {
|
|
|
|
$imgUrl = $pictures[$i]->find('img', 0)->src;
|
|
|
|
if(strcmp($imgUrl, 'https://www.nordbayern.de/img/nb/logo-vnp.png') !== 0) {
|
|
|
|
array_push($images, $imgUrl);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return $images;
|
|
|
|
}
|
|
|
|
|
2020-03-31 22:14:16 +03:00
|
|
|
private function handleArticle($link) {
|
|
|
|
$item = array();
|
|
|
|
$article = getSimpleHTMLDOM($link);
|
2021-07-01 05:21:58 +03:00
|
|
|
defaultLinkTo($article, self::URI);
|
|
|
|
|
2020-03-31 22:14:16 +03:00
|
|
|
$item['uri'] = $link;
|
2022-03-25 02:28:06 +03:00
|
|
|
$item['author'] = $article->find('[class=article__author extrabold]', 0)->plaintext;
|
2022-03-25 03:26:57 +03:00
|
|
|
$item['timestamp'] = strtotime(str_replace('Uhr', '', $article->find('[class=article__release]', 0)->plaintext));
|
2021-08-25 16:08:23 +03:00
|
|
|
if ($article->find('h2', 0) == null) {
|
|
|
|
$item['title'] = $article->find('h3', 0)->innertext;
|
|
|
|
} else {
|
|
|
|
$item['title'] = $article->find('h2', 0)->innertext;
|
|
|
|
}
|
2020-03-31 22:14:16 +03:00
|
|
|
$item['content'] = '';
|
|
|
|
|
2021-07-01 05:21:58 +03:00
|
|
|
//first get images from content
|
|
|
|
$pictures = $article->find('picture');
|
2021-11-06 21:28:12 +03:00
|
|
|
$images = self::getValidImages($pictures);
|
|
|
|
if(!empty($images)) {
|
|
|
|
// If there is an author info block
|
|
|
|
// the first immage will be the portrait of the author
|
|
|
|
// and not the article banner. The banner in this
|
|
|
|
// case will be the second image.
|
2022-01-02 01:51:59 +03:00
|
|
|
// Also skip first image, as its always NN logo.
|
|
|
|
if ($article->find('a[id="openAuthor"]', 0) == null) {
|
|
|
|
$bannerUrl = isset($images[1]) ? $images[1] : null;
|
2021-11-06 21:28:12 +03:00
|
|
|
} else {
|
2022-01-02 01:51:59 +03:00
|
|
|
$bannerUrl = isset($images[2]) ? $images[2] : null;
|
2021-11-06 21:28:12 +03:00
|
|
|
}
|
|
|
|
|
2021-07-01 05:21:58 +03:00
|
|
|
$item['content'] .= '<img src="' . $bannerUrl . '">';
|
2020-03-31 22:14:16 +03:00
|
|
|
}
|
|
|
|
|
2021-08-25 16:08:23 +03:00
|
|
|
if ($article->find('section[class*=article__richtext]', 0) == null) {
|
|
|
|
$content = $article->find('div[class*=modul__teaser]', 0)
|
|
|
|
->find('p', 0);
|
|
|
|
$item['content'] .= $content;
|
|
|
|
} else {
|
|
|
|
$content = $article->find('section[class*=article__richtext]', 0)
|
2021-07-01 05:21:58 +03:00
|
|
|
->find('div', 0)->find('div', 0);
|
2021-08-25 16:08:23 +03:00
|
|
|
$item['content'] .= self::getUseFullContent($content);
|
|
|
|
}
|
2021-07-01 05:21:58 +03:00
|
|
|
|
2022-01-02 01:51:59 +03:00
|
|
|
for($i = 1; $i < count($images); $i++) {
|
2021-11-06 21:28:12 +03:00
|
|
|
$item['content'] .= '<img src="' . $images[$i] . '">';
|
2020-03-31 22:14:16 +03:00
|
|
|
}
|
|
|
|
|
2022-03-25 02:28:06 +03:00
|
|
|
// exclude police reports if desired
|
2021-07-01 05:21:58 +03:00
|
|
|
if($this->getInput('policeReports') ||
|
2021-10-29 23:06:04 +03:00
|
|
|
!str_contains($item['content'], 'Hier geht es zu allen aktuellen Polizeimeldungen.')) {
|
2021-07-01 05:21:58 +03:00
|
|
|
$this->items[] = $item;
|
2020-03-31 22:14:16 +03:00
|
|
|
}
|
2021-07-01 05:21:58 +03:00
|
|
|
|
2020-03-31 22:14:16 +03:00
|
|
|
$article->clear();
|
|
|
|
}
|
|
|
|
|
2021-07-01 05:21:58 +03:00
|
|
|
private function handleNewsblock($listSite) {
|
|
|
|
$main = $listSite->find('main', 0);
|
|
|
|
foreach($main->find('article') as $article) {
|
|
|
|
self::handleArticle(self::URI . $article->find('a', 0)->href);
|
2020-03-31 22:14:16 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public function collectData() {
|
|
|
|
$item = array();
|
|
|
|
$region = $this->getInput('region');
|
2021-07-01 05:21:58 +03:00
|
|
|
if($region === 'rothenburg-o-d-t') {
|
|
|
|
$region = 'rothenburg-ob-der-tauber';
|
|
|
|
}
|
2020-03-31 22:14:16 +03:00
|
|
|
$listSite = getSimpleHTMLDOM(self::URI . '/region/' . $region);
|
|
|
|
|
2021-07-01 05:21:58 +03:00
|
|
|
self::handleNewsblock($listSite);
|
2020-03-31 22:14:16 +03:00
|
|
|
}
|
|
|
|
}
|