mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2024-11-22 17:45:40 +03:00
[ElsevierBridge] fix: broken bridge (#2575)
This commit is contained in:
parent
3e363bbc20
commit
25e9f69261
1 changed files with 18 additions and 55 deletions
|
@ -1,7 +1,7 @@
|
||||||
<?php
|
<?php
|
||||||
class ElsevierBridge extends BridgeAbstract {
|
class ElsevierBridge extends BridgeAbstract {
|
||||||
|
|
||||||
const MAINTAINER = 'Pierre Mazière';
|
const MAINTAINER = 'dvikan';
|
||||||
const NAME = 'Elsevier journals recent articles';
|
const NAME = 'Elsevier journals recent articles';
|
||||||
const URI = 'https://www.journals.elsevier.com/';
|
const URI = 'https://www.journals.elsevier.com/';
|
||||||
const CACHE_TIMEOUT = 43200; //12h
|
const CACHE_TIMEOUT = 43200; //12h
|
||||||
|
@ -16,63 +16,26 @@ class ElsevierBridge extends BridgeAbstract {
|
||||||
)
|
)
|
||||||
));
|
));
|
||||||
|
|
||||||
// Extracts the list of names from an article as string
|
|
||||||
private function extractArticleName($article){
|
|
||||||
$names = $article->find('small', 0);
|
|
||||||
if($names)
|
|
||||||
return trim($names->plaintext);
|
|
||||||
return '';
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extracts the timestamp from an article
|
|
||||||
private function extractArticleTimestamp($article){
|
|
||||||
$time = $article->find('.article-info', 0);
|
|
||||||
if($time) {
|
|
||||||
$timestring = trim($time->plaintext);
|
|
||||||
/*
|
|
||||||
The format depends on the age of an article:
|
|
||||||
- Available online 29 July 2016
|
|
||||||
- July 2016
|
|
||||||
- May–June 2016
|
|
||||||
*/
|
|
||||||
if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)) {
|
|
||||||
return strtotime($matches[0]);
|
|
||||||
} elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)) {
|
|
||||||
return strtotime($matches[0]);
|
|
||||||
} elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)) {
|
|
||||||
return strtotime($matches[0]);
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extracts the content from an article
|
|
||||||
private function extractArticleContent($article){
|
|
||||||
$content = $article->find('.article-content', 0);
|
|
||||||
if($content) {
|
|
||||||
return trim($content->plaintext);
|
|
||||||
}
|
|
||||||
return '';
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getIcon() {
|
|
||||||
return 'https://cdn.elsevier.io/verona/includes/favicons/favicon-32x32.png';
|
|
||||||
}
|
|
||||||
|
|
||||||
public function collectData(){
|
public function collectData(){
|
||||||
$uri = self::URI . $this->getInput('j') . '/recent-articles/';
|
// Not all journals have the /recent-articles page
|
||||||
$html = getSimpleHTMLDOM($uri);
|
$url = sprintf('https://www.journals.elsevier.com/%s/recent-articles/', $this->getInput('j'));
|
||||||
|
$html = getSimpleHTMLDOM($url);
|
||||||
|
|
||||||
foreach($html->find('.pod-listing') as $article) {
|
foreach($html->find('article') as $recentArticle) {
|
||||||
$item = array();
|
$item = [];
|
||||||
$item['uri'] = $article->find('.pod-listing-header>a', 0)->getAttribute('href') . '?np=y';
|
$item['uri'] = $recentArticle->find('a', 0)->getAttribute('href');
|
||||||
$item['title'] = $article->find('.pod-listing-header>a', 0)->plaintext;
|
$item['title'] = $recentArticle->find('h2', 0)->plaintext;
|
||||||
$item['author'] = $this->extractArticleName($article);
|
$item['author'] = $recentArticle->find('p > span', 0)->plaintext;
|
||||||
$item['timestamp'] = $this->extractArticleTimestamp($article);
|
$publicationDateString = trim($recentArticle->find('p > span', 1)->plaintext);
|
||||||
$item['content'] = $this->extractArticleContent($article);
|
$publicationDate = DateTimeImmutable::createFromFormat('F d, Y', $publicationDateString);
|
||||||
|
if ($publicationDate) {
|
||||||
|
$item['timestamp'] = $publicationDate->getTimestamp();
|
||||||
|
}
|
||||||
$this->items[] = $item;
|
$this->items[] = $item;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function getIcon(): string {
|
||||||
|
return 'https://cdn.elsevier.io/verona/includes/favicons/favicon-32x32.png';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue