[
'lang' => [
'name' => 'Language',
'type' => 'list',
'defaultValue' => 'www.euronews.com',
'values' => [
'English' => 'www.euronews.com',
'French' => 'fr.euronews.com',
'German' => 'de.euronews.com',
'Italian' => 'it.euronews.com',
'Spanish' => 'es.euronews.com',
'Portuguese' => 'pt.euronews.com',
'Russian' => 'ru.euronews.com',
'Turkish' => 'tr.euronews.com',
'Greek' => 'gr.euronews.com',
'Hungarian' => 'hu.euronews.com',
'Persian' => 'per.euronews.com',
'Arabic' => 'arabic.euronews.com',
/* These versions don't have timeline.json */
// 'Albanian' => 'euronews.al',
// 'Romanian' => 'euronews.ro',
// 'Georigian' => 'euronewsgeorgia.com',
// 'Bulgarian' => 'euronewsbulgaria.com'
// 'Serbian' => 'euronews.rs'
]
],
'limit' => [
'name' => 'Limit of items per feed',
'required' => true,
'type' => 'number',
'defaultValue' => 10,
'title' => 'Maximum number of returned feed items. Maximum 50, default 10'
],
]
];
public function collectData()
{
$limit = $this->getInput('limit');
$lang = $this->getInput('lang');
if ($lang === 'euronews.com') {
$lang = 'www.euronews.com';
}
$root_url = 'https://' . $lang;
$url = $root_url . '/api/timeline.json?limit=' . $limit;
$json = getContents($url);
$data = json_decode($json, true);
foreach ($data as $datum) {
$datum_uri = $root_url . $datum['path'];
$url_datum = $this->getItemContent($datum_uri);
$categories = [];
if (array_key_exists('program', $datum)) {
if (array_key_exists('title', $datum['program'])) {
$categories[] = $datum['program']['title'];
}
}
if (array_key_exists('themes', $datum)) {
foreach ($datum['themes'] as $theme) {
$categories[] = $theme['title'];
}
}
$item = [
'uri' => $datum_uri,
'title' => $datum['title'],
'uid' => strval($datum['id']),
'timestamp' => $datum['publishedAt'],
'content' => $url_datum['content'],
'author' => $url_datum['author'],
'enclosures' => $url_datum['enclosures'],
'categories' => array_unique($categories)
];
$this->items[] = $item;
}
}
private function getItemContent($url)
{
try {
$html = getSimpleHTMLDOMCached($url);
} catch (Exception $e) {
// Every once in a while it fails with too many redirects
return ['author' => null, 'content' => null, 'enclosures' => null];
}
$data = $html->find('script[type="application/ld+json"]', 0)->innertext;
$json = json_decode($data, true);
$author = 'Euronews';
$content = '';
$enclosures = [];
if (array_key_exists('@graph', $json)) {
foreach ($json['@graph'] as $item) {
if ($item['@type'] == 'NewsArticle') {
if (array_key_exists('author', $item)) {
$author = $item['author']['name'];
}
if (array_key_exists('image', $item)) {
$content .= '
';
}
if (array_key_exists('video', $item)) {
$enclosures[] = $item['video']['contentUrl'];
}
}
}
}
// Normal article
$article_content = $html->find('.c-article-content', 0);
if ($article_content) {
// Usually the .c-article-content is the root of the
// content, but once in a blue moon the root is the second
// div
if (
(count($article_content->children()) == 2)
&& ($article_content->children(1)->tag == 'div')
) {
$article_content = $article_content->children(1);
}
// The content is interspersed with links and stuff, so we
// iterate over the children
foreach ($article_content->children() as $element) {
if ($element->tag == 'p') {
$scribble_live = $element->find('#scribblelive-items', 0);
if (is_null($scribble_live)) {
// A normal paragraph
$content .= '
' . $element->innertext . '
'; } else { // LIVE mode foreach ($scribble_live->children() as $child) { if ($child->tag == 'div') { $content .= '