rss-bridge/bridges/PcGamerBridge.php

35 lines
1.4 KiB
PHP
Raw Normal View History

<?php
class PcGamerBridge extends BridgeAbstract
{
2017-12-25 00:45:56 +03:00
const NAME = 'PC Gamer';
const URI = 'https://www.pcgamer.com/';
const DESCRIPTION = 'PC Gamer is your source for exclusive reviews, demos,
updates and news on all your favorite PC gaming franchises.';
const MAINTAINER = 'IceWreck, mdemoss';
2017-12-25 00:45:56 +03:00
public function collectData()
{
$html = getSimpleHTMLDOMCached($this->getURI(), 300);
$stories = $html->find('a.article-link');
2017-12-25 00:45:56 +03:00
foreach ($stories as $element) {
$item = array();
$item['uri'] = $element->href;
2017-12-25 00:45:56 +03:00
$articleHtml = getSimpleHTMLDOMCached($item['uri']);
// Relying on meta tags ought to be more reliable.
$item['title'] = $articleHtml->find('meta[name=parsely-title]', 0)->content;
$item['content'] = html_entity_decode($articleHtml->find('meta[name=description]', 0)->content);
$item['author'] = $articleHtml->find('meta[name=parsely-author]', 0)->content;
$item['enclosures'][] = $articleHtml->find('meta[name=parsely-image-url]', 0)->content;
/* I don't know why every article has two extra tags, but because
one matches another common tag, "guide," it needs to be removed. */
$item['categories'] = array_diff(
explode(',', $articleHtml->find('meta[name=parsely-tags]', 0)->content),
array('van_buying_guide_progressive', 'serversidehawk')
);
$item['timestamp'] = strtotime($articleHtml->find('meta[name=pub_date]', 0)->content);
2017-12-25 00:45:56 +03:00
$this->items[] = $item;
}
}
}