2019-04-04 23:54:08 +03:00
|
|
|
<?php
|
|
|
|
class EconomistBridge extends BridgeAbstract {
|
|
|
|
const NAME = 'The Economist: Latest Updates';
|
|
|
|
const URI = 'https://www.economist.com';
|
|
|
|
const DESCRIPTION = 'Fetches the latest updates from the Economist.';
|
|
|
|
const MAINTAINER = 'thefranke';
|
|
|
|
const CACHE_TIMEOUT = 3600; // 1h
|
|
|
|
|
|
|
|
public function getIcon() {
|
|
|
|
return 'https://www.economist.com/sites/default/files/econfinal_favicon.ico';
|
|
|
|
}
|
|
|
|
|
|
|
|
public function collectData() {
|
|
|
|
$html = getSimpleHTMLDOM(self::URI . '/latest/')
|
|
|
|
or returnServerError('Could not fetch latest updates form The Economist.');
|
|
|
|
|
2020-11-29 13:31:20 +03:00
|
|
|
foreach($html->find('div.teaser') as $element) {
|
|
|
|
|
|
|
|
$a = $element->find('a.headline-link', 0);
|
|
|
|
$href = $a->href;
|
|
|
|
|
|
|
|
if (substr($href, 0, 4) != 'http')
|
|
|
|
$href = self::URI . $a->href;
|
2019-04-04 23:54:08 +03:00
|
|
|
|
|
|
|
$full = getSimpleHTMLDOMCached($href);
|
|
|
|
$article = $full->find('article', 0);
|
2020-11-29 13:31:20 +03:00
|
|
|
$header = $article->find('span[itemprop="headline"]', 0);
|
|
|
|
$headerimg = $article->find('div[itemprop="image"]', 0)->find('img', 0);
|
|
|
|
$author = $article->find('p[itemprop="byline"]', 0);
|
|
|
|
$time = $article->find('time', 0);
|
|
|
|
$content = $article->find('div[itemprop="text"]', 0);
|
|
|
|
$section = array( $article->find('strong[itemprop="articleSection"]', 0)->plaintext );
|
2019-04-04 23:54:08 +03:00
|
|
|
|
2020-11-29 13:31:20 +03:00
|
|
|
// Author
|
|
|
|
if ($author)
|
|
|
|
$author = substr($author->innertext, 3, strlen($author));
|
|
|
|
else
|
|
|
|
$author = 'The Economist';
|
2019-04-04 23:54:08 +03:00
|
|
|
|
|
|
|
// Remove newsletter subscription box
|
|
|
|
$newsletter = $content->find('div[class="newsletter-form__message"]', 0);
|
|
|
|
if ($newsletter)
|
2019-06-02 14:03:26 +03:00
|
|
|
$newsletter->outertext = '';
|
2019-04-04 23:54:08 +03:00
|
|
|
|
|
|
|
$newsletterForm = $content->find('form', 0);
|
|
|
|
if ($newsletterForm)
|
2019-06-02 14:03:26 +03:00
|
|
|
$newsletterForm->outertext = '';
|
2019-04-04 23:54:08 +03:00
|
|
|
|
|
|
|
// Remove next and previous article URLs at the bottom
|
|
|
|
$nextprev = $content->find('div[class="blog-post__next-previous-wrapper"]', 0);
|
|
|
|
if ($nextprev)
|
2019-06-02 14:03:26 +03:00
|
|
|
$nextprev->outertext = '';
|
2019-04-04 23:54:08 +03:00
|
|
|
|
|
|
|
$item = array();
|
2020-11-29 13:31:20 +03:00
|
|
|
$item['title'] = $header->innertext;
|
2019-04-04 23:54:08 +03:00
|
|
|
$item['uri'] = $href;
|
|
|
|
$item['timestamp'] = strtotime($time->datetime);
|
2020-11-29 13:31:20 +03:00
|
|
|
$item['author'] = $author;
|
2019-04-04 23:54:08 +03:00
|
|
|
$item['categories'] = $section;
|
|
|
|
|
|
|
|
$item['content'] = '<img style="max-width: 100%" src="'
|
2020-11-29 13:31:20 +03:00
|
|
|
. $headerimg->src . '">' . $content->innertext;
|
2019-04-04 23:54:08 +03:00
|
|
|
|
|
|
|
$this->items[] = $item;
|
|
|
|
|
|
|
|
if (count($this->items) >= 10)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|