rss-bridge/bridges/FSecureBlogBridge.php

120 lines
3.3 KiB
PHP
Raw Normal View History

2021-01-29 21:27:35 +03:00
<?php
class FSecureBlogBridge extends BridgeAbstract {
const NAME = 'F-Secure Blog';
const URI = 'https://blog.f-secure.com';
const DESCRIPTION = 'F-Secure Blog';
const MAINTAINER = 'simon816';
const PARAMETERS = array(
'' => array(
'categories' => array(
'name' => 'Blog categories',
'exampleValue' => 'home-security',
),
'language' => array(
'name' => 'Language',
'defaultValue' => 'en',
),
'oldest_date' => array(
'name' => 'Oldest article date',
'exampleValue' => '-2 months',
),
)
);
public function getURI() {
$lang = $this->getInput('language') or 'en';
if ($lang === 'en') {
return self::URI;
}
return self::URI . "/$lang";
}
public function collectData() {
$this->items = array();
$this->seen = array();
$this->oldest = strtotime($this->getInput('oldest_date')) ?: 0;
$categories = $this->getInput('categories');
if (!empty($categories)) {
foreach (explode(',', $categories) as $cat) {
if (!empty($cat)) {
$this->collectCategory($cat);
}
}
return;
}
$html = getSimpleHTMLDOMCached($this->getURI() . '/');
foreach ($html->find('ul.c-header-menu-desktop__list li a') as $link) {
$url = parse_url($link->href);
if (($pos = strpos($url['path'], '/category/')) !== false) {
$cat = substr($url['path'], $pos + strlen('/category/'), -1);
$this->collectCategory($cat);
}
}
}
private function collectCategory($category) {
$url = $this->getURI() . "/category/$category/";
while ($url) {
//Limit total amount of requests
if(count($this->items) >= 20) {
break;
}
2021-01-29 21:27:35 +03:00
$url = $this->collectListing($url);
}
}
// n.b. this relies on articles to be ordered by date so the cutoff works
private function collectListing($url) {
$html = getSimpleHTMLDOMCached($url, 60 * 60);
$items = $html->find('section.b-blog .l-blog__content__listing div.c-listing-item');
$catName = trim($html->find('section.b-blog .c-blog-header__title', 0)->plaintext);
foreach ($items as $item) {
$url = $item->getAttribute('data-url');
if (!$this->collectArticle($url)) {
return null; // Too old, stop collecting
}
}
// Point's to 404 for non-english blog
// $next = $html->find('link[rel=next]', 0);
$next = $html->find('ul.page-numbers a.next', 0);
return $next ? $next->href : null;
}
// Returns a boolean whether to continue collecting articles
// i.e. date is after oldest cutoff
private function collectArticle($url) {
if (array_key_exists($url, $this->seen)) {
return true;
}
$html = getSimpleHTMLDOMCached($url);
$rssItem = array( 'uri' => $url, 'uid' => $url );
$rssItem['title'] = $html->find('meta[property=og:title]', 0)->content;
$dt = $html->find('meta[property=article:published_time]', 0)->content;
// Exit if too old
if (strtotime($dt) < $this->oldest) {
return false;
}
$rssItem['timestamp'] = $dt;
$img = $html->find('meta[property=og:image]', 0);
$rssItem['enclosures'] = $img ? array($img->content) : array();
$rssItem['author'] = trim($html->find('.c-blog-author__text a', 0)->plaintext);
$rssItem['categories'] = array_map(function ($link) {
return trim($link->plaintext);
}, $html->find('.b-single-header__categories .c-category-list a'));
$rssItem['content'] = trim($html->find('article', 0)->innertext);
$this->items[] = $rssItem;
$this->seen[$url] = 1;
return true;
}
}