mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2024-11-29 06:38:51 +03:00
[PcGamerBridge] Use meta tags to generate feed contents (#2271)
This commit is contained in:
parent
8d0fc54e4d
commit
8f98e07979
1 changed files with 18 additions and 28 deletions
|
@ -2,43 +2,33 @@
|
||||||
class PcGamerBridge extends BridgeAbstract
|
class PcGamerBridge extends BridgeAbstract
|
||||||
{
|
{
|
||||||
const NAME = 'PC Gamer';
|
const NAME = 'PC Gamer';
|
||||||
const URI = 'https://www.pcgamer.com/archive/';
|
const URI = 'https://www.pcgamer.com/';
|
||||||
const DESCRIPTION = 'PC Gamer Most Read Stories';
|
const DESCRIPTION = 'PC Gamer is your source for exclusive reviews, demos,
|
||||||
const CACHE_TIMEOUT = 3600;
|
updates and news on all your favorite PC gaming franchises.';
|
||||||
const MAINTAINER = 'IceWreck, mdemoss';
|
const MAINTAINER = 'IceWreck, mdemoss';
|
||||||
|
|
||||||
public function collectData()
|
public function collectData()
|
||||||
{
|
{
|
||||||
$html = getSimpleHTMLDOMCached($this->getURI(), 300);
|
$html = getSimpleHTMLDOMCached($this->getURI(), 300);
|
||||||
$stories = $html->find('ul.basic-list li.day-article');
|
$stories = $html->find('a.article-link');
|
||||||
$i = 0;
|
|
||||||
// Find induvidual stories in the archive page
|
|
||||||
foreach ($stories as $element) {
|
foreach ($stories as $element) {
|
||||||
if($i == 15) break;
|
$item = array();
|
||||||
$item['uri'] = $element->find('a', 0)->href;
|
$item['uri'] = $element->href;
|
||||||
// error_log(print_r($item['uri'], TRUE));
|
|
||||||
$articleHtml = getSimpleHTMLDOMCached($item['uri']);
|
$articleHtml = getSimpleHTMLDOMCached($item['uri']);
|
||||||
$item['title'] = $element->find('a', 0)->plaintext;
|
|
||||||
|
// Relying on meta tags ought to be more reliable.
|
||||||
|
$item['title'] = $articleHtml->find('meta[name=parsely-title]', 0)->content;
|
||||||
|
$item['content'] = html_entity_decode($articleHtml->find('meta[name=description]', 0)->content);
|
||||||
|
$item['author'] = $articleHtml->find('meta[name=parsely-author]', 0)->content;
|
||||||
|
$item['enclosures'][] = $articleHtml->find('meta[name=parsely-image-url]', 0)->content;
|
||||||
|
/* I don't know why every article has two extra tags, but because
|
||||||
|
one matches another common tag, "guide," it needs to be removed. */
|
||||||
|
$item['categories'] = array_diff(
|
||||||
|
explode(',', $articleHtml->find('meta[name=parsely-tags]', 0)->content),
|
||||||
|
array('van_buying_guide_progressive', 'serversidehawk')
|
||||||
|
);
|
||||||
$item['timestamp'] = strtotime($articleHtml->find('meta[name=pub_date]', 0)->content);
|
$item['timestamp'] = strtotime($articleHtml->find('meta[name=pub_date]', 0)->content);
|
||||||
$item['author'] = $articleHtml->find('span.by-author a', 0)->plaintext;
|
|
||||||
|
|
||||||
// Get the article content
|
|
||||||
$articleContents = $articleHtml->find('#article-body', 0);
|
|
||||||
|
|
||||||
/*
|
|
||||||
By default the img src has a link to an error image and then the actual image
|
|
||||||
is added in by JS. So we replace the error image with the actual full size image
|
|
||||||
whoose link is in one of the attributes of the img tag
|
|
||||||
*/
|
|
||||||
foreach($articleContents->find('img') as $img) {
|
|
||||||
$imgsrc = $img->getAttribute('data-original-mos');
|
|
||||||
// error_log($imgsrc);
|
|
||||||
$img->src = $imgsrc;
|
|
||||||
}
|
|
||||||
|
|
||||||
$item['content'] = $articleContents;
|
|
||||||
$this->items[] = $item;
|
$this->items[] = $item;
|
||||||
$i++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue