From 25e9f692614dfbfabbabec0dfe7015ca6c27ac4b Mon Sep 17 00:00:00 2001 From: dag Date: Thu, 31 Mar 2022 09:49:30 +0200 Subject: [PATCH] [ElsevierBridge] fix: broken bridge (#2575) --- bridges/ElsevierBridge.php | 73 ++++++++++---------------------------- 1 file changed, 18 insertions(+), 55 deletions(-) diff --git a/bridges/ElsevierBridge.php b/bridges/ElsevierBridge.php index 08ed4c51..5ab19a1a 100644 --- a/bridges/ElsevierBridge.php +++ b/bridges/ElsevierBridge.php @@ -1,7 +1,7 @@ find('small', 0); - if($names) - return trim($names->plaintext); - return ''; - } - - // Extracts the timestamp from an article - private function extractArticleTimestamp($article){ - $time = $article->find('.article-info', 0); - if($time) { - $timestring = trim($time->plaintext); - /* - The format depends on the age of an article: - - Available online 29 July 2016 - - July 2016 - - May–June 2016 - */ - if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)) { - return strtotime($matches[0]); - } elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)) { - return strtotime($matches[0]); - } elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)) { - return strtotime($matches[0]); - } else { - return 0; - } - } - return 0; - } - - // Extracts the content from an article - private function extractArticleContent($article){ - $content = $article->find('.article-content', 0); - if($content) { - return trim($content->plaintext); - } - return ''; - } - - public function getIcon() { - return 'https://cdn.elsevier.io/verona/includes/favicons/favicon-32x32.png'; - } - public function collectData(){ - $uri = self::URI . $this->getInput('j') . '/recent-articles/'; - $html = getSimpleHTMLDOM($uri); + // Not all journals have the /recent-articles page + $url = sprintf('https://www.journals.elsevier.com/%s/recent-articles/', $this->getInput('j')); + $html = getSimpleHTMLDOM($url); - foreach($html->find('.pod-listing') as $article) { - $item = array(); - $item['uri'] = $article->find('.pod-listing-header>a', 0)->getAttribute('href') . '?np=y'; - $item['title'] = $article->find('.pod-listing-header>a', 0)->plaintext; - $item['author'] = $this->extractArticleName($article); - $item['timestamp'] = $this->extractArticleTimestamp($article); - $item['content'] = $this->extractArticleContent($article); + foreach($html->find('article') as $recentArticle) { + $item = []; + $item['uri'] = $recentArticle->find('a', 0)->getAttribute('href'); + $item['title'] = $recentArticle->find('h2', 0)->plaintext; + $item['author'] = $recentArticle->find('p > span', 0)->plaintext; + $publicationDateString = trim($recentArticle->find('p > span', 1)->plaintext); + $publicationDate = DateTimeImmutable::createFromFormat('F d, Y', $publicationDateString); + if ($publicationDate) { + $item['timestamp'] = $publicationDate->getTimestamp(); + } $this->items[] = $item; } } + + public function getIcon(): string { + return 'https://cdn.elsevier.io/verona/includes/favicons/favicon-32x32.png'; + } }