2020-03-31 22:14:16 +03:00
|
|
|
<?php
|
|
|
|
ini_set('max_execution_time', '300');
|
|
|
|
class NordbayernBridge extends BridgeAbstract {
|
|
|
|
|
|
|
|
const MAINTAINER = 'schabi.org';
|
|
|
|
const NAME = 'Nordbayern Bridge';
|
|
|
|
const CACHE_TIMEOUT = 3600;
|
|
|
|
const URI = 'https://www.nordbayern.de';
|
|
|
|
const DESCRIPTION = 'Bridge for Bavarian reginoal news site nordbayern.de';
|
|
|
|
const PARAMETERS = array( array(
|
|
|
|
'region' => array(
|
|
|
|
'name' => 'region',
|
|
|
|
'type' => 'list',
|
|
|
|
'exampleValue' => 'Nürnberg',
|
|
|
|
'title' => 'Select a region',
|
|
|
|
'values' => array(
|
|
|
|
'Nürnberg' => 'nuernberg',
|
|
|
|
'Fürth' => 'fuerth',
|
|
|
|
'Altdorf' => 'altdorf',
|
|
|
|
'Ansbach' => 'ansbach',
|
|
|
|
'Bad Windsheim' => 'bad-windsheim',
|
|
|
|
'Bamberg' => 'bamberg',
|
|
|
|
'Dinkelsbühl/Feuchtwangen' => 'dinkelsbuehl-feuchtwangen',
|
|
|
|
'Feucht' => 'feucht',
|
|
|
|
'Forchheim' => 'forchheim',
|
|
|
|
'Gunzenhausen' => 'gunzenhausen',
|
|
|
|
'Hersbruck' => 'hersbruck',
|
|
|
|
'Herzogenaurach' => 'herzogenaurach',
|
2021-02-21 19:43:23 +03:00
|
|
|
'Hilpoltstein' => 'hilpoltstein',
|
2020-03-31 22:14:16 +03:00
|
|
|
'Höchstadt' => 'hoechstadt',
|
|
|
|
'Lauf' => 'lauf',
|
|
|
|
'Neumarkt' => 'neumarkt',
|
|
|
|
'Neustadt/Aisch' => 'neustadt-aisch',
|
|
|
|
'Pegnitz' => 'pegnitz',
|
|
|
|
'Roth' => 'roth',
|
|
|
|
'Rothenburg o.d.T.' => 'rothenburg-o-d-t',
|
|
|
|
'Schwabach' => 'schwabach',
|
|
|
|
'Treuchtlingen' => 'treuchtlingen',
|
|
|
|
'Weißenburg' => 'weissenburg'
|
|
|
|
)
|
|
|
|
),
|
|
|
|
'policeReports' => array(
|
|
|
|
'name' => 'Police Reports',
|
|
|
|
'type' => 'checkbox',
|
|
|
|
'exampleValue' => 'checked',
|
|
|
|
'title' => 'Read Police Reports',
|
|
|
|
)
|
|
|
|
));
|
|
|
|
|
|
|
|
private function getImageUrlFromScript($script) {
|
2020-10-15 10:53:19 +03:00
|
|
|
preg_match(
|
2021-05-19 11:44:17 +03:00
|
|
|
"#src=\\\\'(https:[-:\\.\\\\/a-zA-Z0-9%_]*\\.(jpg|JPG|jpeg))#",
|
2020-10-15 10:53:19 +03:00
|
|
|
$script->innertext,
|
|
|
|
$matches,
|
|
|
|
PREG_OFFSET_CAPTURE
|
|
|
|
);
|
2020-03-31 22:14:16 +03:00
|
|
|
if(isset($matches[1][0])) {
|
|
|
|
return stripcslashes($matches[1][0]) . '?w=800';
|
|
|
|
}
|
2020-10-15 10:53:19 +03:00
|
|
|
return null;
|
2020-03-31 22:14:16 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
private function handleArticle($link) {
|
|
|
|
$item = array();
|
|
|
|
$article = getSimpleHTMLDOM($link);
|
|
|
|
$content = $article->find('div[class*=article-content]', 0);
|
2021-05-19 11:43:10 +03:00
|
|
|
if(is_null($content)) {
|
|
|
|
// If content is null its most likely a slideshow.
|
|
|
|
// we do not support slideshows right now as I (theScrabi)
|
|
|
|
// think there is to little informational value in these
|
|
|
|
// to actually add support to them.
|
|
|
|
return;
|
|
|
|
}
|
2020-03-31 22:14:16 +03:00
|
|
|
$item['uri'] = $link;
|
|
|
|
$item['title'] = $article->find('h1', 0)->innertext;
|
|
|
|
$item['content'] = '';
|
|
|
|
|
|
|
|
//first get image from block/modul
|
|
|
|
$figure = $article->find('figure[class*=panorama]', 0);
|
|
|
|
if($figure !== null) {
|
|
|
|
$imgUrl = self::getImageUrlFromScript($figure->find('script', 0));
|
|
|
|
if($imgUrl === null) {
|
|
|
|
$imgUrl = self::getImageUrlFromScript($figure->find('script', 1));
|
|
|
|
}
|
|
|
|
$item['content'] .= '<img src="' . $imgUrl . '">';
|
|
|
|
}
|
|
|
|
|
|
|
|
// get regular paragraphs
|
|
|
|
foreach($content->children() as $child) {
|
|
|
|
if($child->tag === 'p') {
|
|
|
|
$item['content'] .= $child;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//get image divs
|
|
|
|
foreach($content->find('div[class*=article-slideshow]') as $slides) {
|
|
|
|
foreach($slides->children() as $child) {
|
|
|
|
switch($child->tag) {
|
|
|
|
case 'p':
|
|
|
|
$item['content'] .= $child;
|
|
|
|
break;
|
|
|
|
case 'h5':
|
|
|
|
$item['content'] .= '<h5><a href="'
|
|
|
|
. self::URI . $child->find('a', 0)->href . '">' . $child->plaintext . '</a></h5>';
|
|
|
|
break;
|
|
|
|
case 'a':
|
|
|
|
$url = self::getImageUrlFromScript($child->find('script', 0));
|
|
|
|
$item['content'] .= '<img src="' . $url . '">';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$this->items[] = $item;
|
|
|
|
$article->clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
private function handleNewsblock($listSite, $readPoliceReports) {
|
|
|
|
$newsBlocks = $listSite->find('section[class*=newsblock]');
|
2020-09-18 08:13:31 +03:00
|
|
|
$regionalNewsBlock = $newsBlocks[0];
|
|
|
|
$policeBlock = $newsBlocks[1];
|
|
|
|
foreach($regionalNewsBlock->find('h2') as $headline) {
|
|
|
|
self::handleArticle(self::URI . $headline->find('a', 0)->href);
|
|
|
|
}
|
2020-03-31 22:14:16 +03:00
|
|
|
if($readPoliceReports === true) {
|
|
|
|
foreach($policeBlock->find('h2') as $headline) {
|
|
|
|
self::handleArticle(self::URI . $headline->find('a', 0)->href);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public function collectData() {
|
|
|
|
$item = array();
|
|
|
|
$region = $this->getInput('region');
|
|
|
|
$listSite = getSimpleHTMLDOM(self::URI . '/region/' . $region);
|
|
|
|
|
|
|
|
self::handleNewsblock($listSite, $this->getInput('policeReports'));
|
|
|
|
}
|
|
|
|
}
|