2019-02-24 13:47:29 +03:00
|
|
|
<?php
|
|
|
|
|
|
|
|
class AsahiShimbunAJWBridge extends BridgeAbstract
|
|
|
|
{
|
|
|
|
const NAME = 'Asahi Shimbun AJW';
|
|
|
|
const BASE_URI = 'http://www.asahi.com';
|
|
|
|
const URI = self::BASE_URI . '/ajw/';
|
|
|
|
const DESCRIPTION = 'Asahi Shimbun - Asia & Japan Watch';
|
|
|
|
const MAINTAINER = 'somini';
|
|
|
|
const PARAMETERS = [
|
2022-07-01 16:10:30 +03:00
|
|
|
[
|
2019-02-24 13:47:29 +03:00
|
|
|
'section' => [
|
|
|
|
'type' => 'list',
|
|
|
|
'name' => 'Section',
|
|
|
|
'values' => [
|
|
|
|
'Japan » Social Affairs' => 'japan/social',
|
|
|
|
'Japan » People' => 'japan/people',
|
|
|
|
'Japan » 3/11 Disaster' => 'japan/0311disaster',
|
|
|
|
'Japan » Sci & Tech' => 'japan/sci_tech',
|
|
|
|
'Politics' => 'politics',
|
|
|
|
'Business' => 'business',
|
|
|
|
'Culture » Style' => 'culture/style',
|
|
|
|
'Culture » Movies' => 'culture/movies',
|
|
|
|
'Culture » Manga & Anime' => 'culture/manga_anime',
|
2022-04-26 13:10:10 +03:00
|
|
|
'Asia » China' => 'asia_world/china',
|
|
|
|
'Asia » Korean Peninsula' => 'asia_world/korean_peninsula',
|
|
|
|
'Asia » Around Asia' => 'asia_world/around_asia',
|
|
|
|
'Asia » World' => 'asia_world/world',
|
2019-02-24 13:47:29 +03:00
|
|
|
'Opinion » Editorial' => 'opinion/editorial',
|
|
|
|
'Opinion » Vox Populi' => 'opinion/vox',
|
2022-07-01 16:10:30 +03:00
|
|
|
],
|
2022-03-25 04:04:55 +03:00
|
|
|
'defaultValue' => 'politics',
|
2022-07-01 16:10:30 +03:00
|
|
|
]
|
|
|
|
]
|
|
|
|
];
|
2019-02-24 13:47:29 +03:00
|
|
|
|
2022-01-02 12:36:09 +03:00
|
|
|
private function getSectionURI($section)
|
2022-07-01 16:10:30 +03:00
|
|
|
{
|
2022-01-02 12:36:09 +03:00
|
|
|
return self::getURI() . $section . '/';
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
2019-02-24 13:47:29 +03:00
|
|
|
|
|
|
|
public function collectData()
|
|
|
|
{
|
|
|
|
$html = getSimpleHTMLDOM($this->getSectionURI($this->getInput('section')));
|
|
|
|
|
|
|
|
foreach ($html->find('#MainInner li a') as $element) {
|
|
|
|
if ($element->parent()->class == 'HeadlineTopImage-S') {
|
|
|
|
Debug::log('Skip Headline, it is repeated below');
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
$item = [];
|
|
|
|
|
|
|
|
$item['uri'] = self::BASE_URI . $element->href;
|
|
|
|
$e_lead = $element->find('span.Lead', 0);
|
|
|
|
if ($e_lead) {
|
|
|
|
$item['content'] = $e_lead->innertext;
|
2019-06-02 14:03:26 +03:00
|
|
|
$e_lead->outertext = '';
|
2022-07-01 16:10:30 +03:00
|
|
|
} else {
|
2019-02-24 13:47:29 +03:00
|
|
|
$item['content'] = $element->innertext;
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
2019-02-24 13:47:29 +03:00
|
|
|
$e_date = $element->find('span.EnDate', 0);
|
|
|
|
if ($e_date) {
|
|
|
|
$item['timestamp'] = strtotime($e_date->innertext);
|
2019-06-02 14:03:26 +03:00
|
|
|
$e_date->outertext = '';
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
2019-02-24 13:47:29 +03:00
|
|
|
$e_video = $element->find('span.EnVideo', 0);
|
|
|
|
if ($e_video) {
|
2019-06-02 14:03:26 +03:00
|
|
|
$e_video->outertext = '';
|
2019-02-24 13:47:29 +03:00
|
|
|
$element->innertext = "VIDEO: $element->innertext";
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
2019-02-24 13:47:29 +03:00
|
|
|
$item['title'] = $element->innertext;
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-02-24 13:47:29 +03:00
|
|
|
$this->items[] = $item;
|
|
|
|
}
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
2019-02-24 13:47:29 +03:00
|
|
|
}
|