rss-bridge/bridges/SexactuBridge.php

76 lines
3.2 KiB
PHP
Raw Normal View History

2014-02-04 20:54:18 +04:00
<?php
class SexactuBridge extends BridgeAbstract{
2014-02-04 20:54:18 +04:00
const MAINTAINER = "Riduidel";
const NAME = "Sexactu";
const URI = "https://www.gqmagazine.fr";
const CACHE_TIMEOUT = 7200; // 2h
const DESCRIPTION = "Sexactu via rss-bridge";
public function collectData(){
2014-02-20 11:42:40 +04:00
$find = array('janvier', 'février', 'mars', 'avril', 'mai', 'juin', 'juillet', 'août', 'septembre', 'novembre', 'décembre');
$replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December');
$html = getSimpleHTMLDOM($this->getURI()) or returnServerError('Could not request '.$this->getURI());
foreach($html->find('.content-holder') as $contentHolder) {
// only use first list as second one only contains pages numbers
$articles = $contentHolder->find('ul', 0);
foreach($articles->find('li') as $element) {
// if you ask about that method_exists, there seems to be a bug in simple html dom
// see stackoverflow for more details : http://stackoverflow.com/a/10828479/15619
if(is_object($element)) {
$item = array();
// various metadata
$titleBlock = $element->find('.title-holder', 0);
if(is_object($titleBlock)) {
2014-02-20 11:42:40 +04:00
$titleDetails = $titleBlock->find('.article-title',0);
$titleData = $titleDetails->find('h2', 0)->find('a',0);
$titleTimestamp =$titleDetails->find('h4',0);
$item['title'] = $this->correctCase(trim($titleData->innertext));
$item['uri'] = self::URI.$titleData->href;
2014-02-20 11:42:40 +04:00
// Fugly date parsing due to the fact my DNS-323 doesn't support php intl extension
$dateText = $titleTimestamp->innertext;
$dateText = substr($dateText, strpos($dateText,',')+1);
$dateText = str_replace($find, $replace, strtolower($dateText));
$date = strtotime($dateText);
$item['timestamp'] = $date;
2014-02-20 11:42:40 +04:00
$item['author'] = "Maïa Mazaurette";
$elementText = $element->find('.text-container', 0);
// don't forget to replace images server url with gq one
foreach($elementText->find('img') as $image) {
$image->src = self::URI.$image->src;
}
$item['content'] = $elementText->innertext;
$this->items[] = $item;
}
}
}
2014-02-04 20:54:18 +04:00
}
}
public function getURI(){
return self::URI.'/sexactu';
2014-02-04 20:54:18 +04:00
}
private function correctCase($str) {
2014-02-20 11:42:40 +04:00
$sentences=explode('.', mb_strtolower($str, "UTF-8"));
$str="";
$sep="";
foreach ($sentences as $sentence)
{
//upper case first char
$sentence=ucfirst(trim($sentence));
2014-02-20 11:42:40 +04:00
//append sentence to output
$str=$str.$sep.$sentence;
$sep=". ";
}
return $str;
}
2014-02-04 20:54:18 +04:00
}