mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-04 07:45:55 +03:00
56 lines
1.5 KiB
PHP
56 lines
1.5 KiB
PHP
<?php
|
|
|
|
class HarvardHealthBlogBridge extends BridgeAbstract
|
|
{
|
|
const NAME = 'Harvard Health Blog';
|
|
const URI = 'https://www.health.harvard.edu/blog';
|
|
const DESCRIPTION = 'Retrieve articles from health.harvard.edu';
|
|
const MAINTAINER = 'tillcash';
|
|
const MAX_ARTICLES = 10;
|
|
|
|
public function collectData()
|
|
{
|
|
$dom = getSimpleHTMLDOM(self::URI);
|
|
$count = 0;
|
|
|
|
foreach ($dom->find('div[class="mb-16 md:flex"]') as $element) {
|
|
if ($count >= self::MAX_ARTICLES) {
|
|
break;
|
|
}
|
|
|
|
$data = $element->find('a[class="hover:text-red transition-colors duration-200"]', 0);
|
|
if (!$data) {
|
|
continue;
|
|
}
|
|
|
|
$url = $data->href;
|
|
|
|
$this->items[] = [
|
|
'content' => $this->constructContent($url),
|
|
'timestamp' => $element->find('time', 0)->datetime,
|
|
'title' => $data->plaintext,
|
|
'uid' => $url,
|
|
'uri' => $url,
|
|
];
|
|
|
|
$count++;
|
|
}
|
|
}
|
|
|
|
private function constructContent($url)
|
|
{
|
|
$dom = getSimpleHTMLDOMCached($url);
|
|
|
|
$article = $dom->find('div[class*="content-repository-content"]', 0);
|
|
if (!$article) {
|
|
return 'Content Not Found';
|
|
}
|
|
|
|
// Remove ads
|
|
foreach ($article->find('.inline-ad') as $remove) {
|
|
$remove->outertext = '';
|
|
}
|
|
|
|
return $article->innertext;
|
|
}
|
|
}
|