2019-04-04 23:54:08 +03:00
|
|
|
<?php
|
|
|
|
|
2021-09-26 14:25:19 +03:00
|
|
|
class EconomistBridge extends FeedExpander
|
2022-07-01 16:10:30 +03:00
|
|
|
{
|
2021-09-26 14:25:19 +03:00
|
|
|
const MAINTAINER = 'bockiii';
|
|
|
|
const NAME = 'Economist Bridge';
|
|
|
|
const URI = 'https://www.economist.com/';
|
|
|
|
const CACHE_TIMEOUT = 3600; //1hour
|
|
|
|
const DESCRIPTION = 'Returns the latest articles for the selected category';
|
2020-11-29 13:31:20 +03:00
|
|
|
|
2021-09-26 14:25:19 +03:00
|
|
|
const PARAMETERS = [
|
|
|
|
'global' => [
|
|
|
|
'limit' => [
|
|
|
|
'name' => 'Feed Item Limit',
|
|
|
|
'required' => true,
|
|
|
|
'type' => 'number',
|
|
|
|
'defaultValue' => 10,
|
|
|
|
'title' => 'Maximum number of returned feed items. Maximum 30, default 10'
|
2022-07-01 16:10:30 +03:00
|
|
|
]
|
2021-09-26 14:25:19 +03:00
|
|
|
],
|
|
|
|
'Topics' => [
|
|
|
|
'topic' => [
|
|
|
|
'name' => 'Topics',
|
|
|
|
'type' => 'list',
|
|
|
|
'title' => 'Select a Topic',
|
|
|
|
'defaultValue' => 'latest',
|
|
|
|
'values' => [
|
|
|
|
'Latest' => 'latest',
|
|
|
|
'The world this week' => 'the-world-this-week',
|
|
|
|
'Letters' => 'letters',
|
|
|
|
'Leaders' => 'leaders',
|
|
|
|
'Briefings' => 'briefing',
|
|
|
|
'Special reports' => 'special-report',
|
|
|
|
'Britain' => 'britain',
|
|
|
|
'Europe' => 'europe',
|
|
|
|
'United States' => 'united-states',
|
|
|
|
'The Americas' => 'the-americas',
|
|
|
|
'Middle East and Africa' => 'middle-east-and-africa',
|
|
|
|
'Asia' => 'asia',
|
|
|
|
'China' => 'china',
|
|
|
|
'International' => 'international',
|
|
|
|
'Business' => 'business',
|
|
|
|
'Finance and economics' => 'finance-and-economics',
|
|
|
|
'Science and technology' => 'science-and-technology',
|
|
|
|
'Books and arts' => 'books-and-arts',
|
|
|
|
'Obituaries' => 'obituary',
|
|
|
|
'Graphic detail' => 'graphic-detail',
|
|
|
|
'Indicators' => 'economic-and-financial-indicators',
|
2022-07-01 16:10:30 +03:00
|
|
|
]
|
|
|
|
]
|
2021-09-26 14:25:19 +03:00
|
|
|
],
|
|
|
|
'Blogs' => [
|
|
|
|
'blog' => [
|
|
|
|
'name' => 'Blogs',
|
|
|
|
'type' => 'list',
|
|
|
|
'title' => 'Select a Blog',
|
|
|
|
'values' => [
|
|
|
|
'Bagehots notebook' => 'bagehots-notebook',
|
|
|
|
'Bartleby' => 'bartleby',
|
|
|
|
'Buttonwoods notebook' => 'buttonwoods-notebook',
|
|
|
|
'Charlemagnes notebook' => 'charlemagnes-notebook',
|
|
|
|
'Democracy in America' => 'democracy-in-america',
|
|
|
|
'Erasmus' => 'erasmus',
|
|
|
|
'Free exchange' => 'free-exchange',
|
|
|
|
'Game theory' => 'game-theory',
|
|
|
|
'Gulliver' => 'gulliver',
|
|
|
|
'Kaffeeklatsch' => 'kaffeeklatsch',
|
|
|
|
'Prospero' => 'prospero',
|
|
|
|
'The Economist Explains' => 'the-economist-explains',
|
2022-07-01 16:10:30 +03:00
|
|
|
]
|
|
|
|
]
|
|
|
|
]
|
2021-09-26 14:25:19 +03:00
|
|
|
];
|
2019-04-04 23:54:08 +03:00
|
|
|
|
2021-09-26 14:25:19 +03:00
|
|
|
public function collectData()
|
|
|
|
{
|
|
|
|
// get if topics or blogs were selected and store the selected category
|
|
|
|
switch ($this->queriedContext) {
|
|
|
|
case 'Topics':
|
|
|
|
$category = $this->getInput('topic');
|
|
|
|
break;
|
|
|
|
case 'Blogs':
|
|
|
|
$category = $this->getInput('blog');
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
$category = 'latest';
|
|
|
|
}
|
|
|
|
// limit the returned articles to 30 at max
|
|
|
|
if ((int)$this->getInput('limit') <= 30) {
|
|
|
|
$limit = (int)$this->getInput('limit');
|
|
|
|
} else {
|
|
|
|
$limit = 30;
|
|
|
|
}
|
2019-04-04 23:54:08 +03:00
|
|
|
|
2021-09-26 14:25:19 +03:00
|
|
|
$this->collectExpandableDatas('https://www.economist.com/' . $category . '/rss.xml', $limit);
|
|
|
|
}
|
2019-04-04 23:54:08 +03:00
|
|
|
|
2021-09-26 14:25:19 +03:00
|
|
|
protected function parseItem($feedItem)
|
|
|
|
{
|
|
|
|
$item = parent::parseItem($feedItem);
|
2022-01-02 12:36:09 +03:00
|
|
|
$article = getSimpleHTMLDOM($item['uri']);
|
2021-09-26 14:25:19 +03:00
|
|
|
// before the article can be added, it needs to be cleaned up, thus, the extra function
|
2022-03-22 22:24:07 +03:00
|
|
|
// We also need to distinguish between old style and new style articles
|
|
|
|
if ($article->find('article', 0)->getAttribute('data-test-id') == 'Article') {
|
|
|
|
$contentNode = 'div.layout-article-body';
|
|
|
|
$imgNode = 'div.article__lead-image';
|
|
|
|
$categoryNode = 'span.article__subheadline';
|
2022-03-25 05:06:00 +03:00
|
|
|
} elseif ($article->find('article', 0)->getAttribute('data-test-id') === 'NewArticle') {
|
|
|
|
$contentNode = 'section';
|
|
|
|
$imgNode = 'figure.css-12eysrk.e3y6nua0';
|
2022-03-22 22:24:07 +03:00
|
|
|
$categoryNode = 'span.ern1uyf0';
|
2022-03-25 05:06:00 +03:00
|
|
|
} else {
|
2022-03-25 12:41:27 +03:00
|
|
|
return;
|
2022-03-22 22:24:07 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
$item['content'] = $this->cleanContent($article, $contentNode);
|
2022-01-15 08:29:18 +03:00
|
|
|
// only the article lead image is retained if it's there
|
2022-03-22 22:24:07 +03:00
|
|
|
if (!is_null($article->find($imgNode, 0))) {
|
|
|
|
$item['enclosures'][] = $article->find($imgNode, 0)->find('img', 0)->getAttribute('src');
|
2022-01-15 08:29:18 +03:00
|
|
|
} else {
|
|
|
|
$item['enclosures'][] = '';
|
|
|
|
}
|
2022-03-22 22:24:07 +03:00
|
|
|
// add the subheadline as category. This will create a link in new articles
|
|
|
|
// and a text in old articles
|
|
|
|
$item['categories'][] = $article->find($categoryNode, 0)->innertext;
|
2019-04-04 23:54:08 +03:00
|
|
|
|
2021-09-26 14:25:19 +03:00
|
|
|
return $item;
|
|
|
|
}
|
2019-04-04 23:54:08 +03:00
|
|
|
|
2022-03-22 22:24:07 +03:00
|
|
|
private function cleanContent($article, $contentNode)
|
|
|
|
{
|
2021-09-26 14:25:19 +03:00
|
|
|
// the actual article is in this div
|
2022-03-22 22:24:07 +03:00
|
|
|
$content = $article->find($contentNode, 0)->innertext;
|
2021-09-26 14:25:19 +03:00
|
|
|
// clean the article content. Remove all div's since the text is in paragraph elements
|
|
|
|
foreach (
|
2022-07-01 16:10:30 +03:00
|
|
|
[
|
2021-09-26 14:25:19 +03:00
|
|
|
'<div '
|
|
|
|
] as $tag_start
|
|
|
|
) {
|
|
|
|
$content = stripRecursiveHTMLSection($content, 'div', $tag_start);
|
2019-04-04 23:54:08 +03:00
|
|
|
}
|
2021-09-26 14:25:19 +03:00
|
|
|
// now remove embedded iframes. The podcast postings contain these for example
|
|
|
|
$content = preg_replace('/<iframe.*?\/iframe>/i', '', $content);
|
|
|
|
// fix the relative links
|
|
|
|
$content = defaultLinkTo($content, $this->getURI());
|
|
|
|
|
|
|
|
return $content;
|
2019-04-04 23:54:08 +03:00
|
|
|
}
|
|
|
|
}
|