mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2024-11-22 17:45:40 +03:00
[ElsevierBridge] fix: broken bridge (#2575)
This commit is contained in:
parent
3e363bbc20
commit
25e9f69261
1 changed files with 18 additions and 55 deletions
|
@ -1,7 +1,7 @@
|
|||
<?php
|
||||
class ElsevierBridge extends BridgeAbstract {
|
||||
|
||||
const MAINTAINER = 'Pierre Mazière';
|
||||
const MAINTAINER = 'dvikan';
|
||||
const NAME = 'Elsevier journals recent articles';
|
||||
const URI = 'https://www.journals.elsevier.com/';
|
||||
const CACHE_TIMEOUT = 43200; //12h
|
||||
|
@ -16,63 +16,26 @@ class ElsevierBridge extends BridgeAbstract {
|
|||
)
|
||||
));
|
||||
|
||||
// Extracts the list of names from an article as string
|
||||
private function extractArticleName($article){
|
||||
$names = $article->find('small', 0);
|
||||
if($names)
|
||||
return trim($names->plaintext);
|
||||
return '';
|
||||
}
|
||||
|
||||
// Extracts the timestamp from an article
|
||||
private function extractArticleTimestamp($article){
|
||||
$time = $article->find('.article-info', 0);
|
||||
if($time) {
|
||||
$timestring = trim($time->plaintext);
|
||||
/*
|
||||
The format depends on the age of an article:
|
||||
- Available online 29 July 2016
|
||||
- July 2016
|
||||
- May–June 2016
|
||||
*/
|
||||
if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)) {
|
||||
return strtotime($matches[0]);
|
||||
} elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)) {
|
||||
return strtotime($matches[0]);
|
||||
} elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)) {
|
||||
return strtotime($matches[0]);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Extracts the content from an article
|
||||
private function extractArticleContent($article){
|
||||
$content = $article->find('.article-content', 0);
|
||||
if($content) {
|
||||
return trim($content->plaintext);
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
public function getIcon() {
|
||||
return 'https://cdn.elsevier.io/verona/includes/favicons/favicon-32x32.png';
|
||||
}
|
||||
|
||||
public function collectData(){
|
||||
$uri = self::URI . $this->getInput('j') . '/recent-articles/';
|
||||
$html = getSimpleHTMLDOM($uri);
|
||||
// Not all journals have the /recent-articles page
|
||||
$url = sprintf('https://www.journals.elsevier.com/%s/recent-articles/', $this->getInput('j'));
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
|
||||
foreach($html->find('.pod-listing') as $article) {
|
||||
$item = array();
|
||||
$item['uri'] = $article->find('.pod-listing-header>a', 0)->getAttribute('href') . '?np=y';
|
||||
$item['title'] = $article->find('.pod-listing-header>a', 0)->plaintext;
|
||||
$item['author'] = $this->extractArticleName($article);
|
||||
$item['timestamp'] = $this->extractArticleTimestamp($article);
|
||||
$item['content'] = $this->extractArticleContent($article);
|
||||
foreach($html->find('article') as $recentArticle) {
|
||||
$item = [];
|
||||
$item['uri'] = $recentArticle->find('a', 0)->getAttribute('href');
|
||||
$item['title'] = $recentArticle->find('h2', 0)->plaintext;
|
||||
$item['author'] = $recentArticle->find('p > span', 0)->plaintext;
|
||||
$publicationDateString = trim($recentArticle->find('p > span', 1)->plaintext);
|
||||
$publicationDate = DateTimeImmutable::createFromFormat('F d, Y', $publicationDateString);
|
||||
if ($publicationDate) {
|
||||
$item['timestamp'] = $publicationDate->getTimestamp();
|
||||
}
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
public function getIcon(): string {
|
||||
return 'https://cdn.elsevier.io/verona/includes/favicons/favicon-32x32.png';
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue