2014-04-02 13:55:42 +04:00
< ? php
2015-03-18 19:42:55 +03:00
define ( " RSS_PREFIX " , " http://feeds.gawker.com/ " );
define ( " RSS_SUFFIX " , " /full " );
2015-11-05 23:26:48 +03:00
2016-08-24 20:06:07 +03:00
class GawkerBridge extends RssExpander {
2015-11-05 18:50:18 +03:00
public function loadMetadatas () {
$this -> maintainer = " mitsukarenai " ;
$this -> name = " Gawker media " ;
$this -> uri = " http://feeds.gawker.com/ " ;
$this -> description = " A bridge allowing access to any of the numerous Gawker media blogs (Lifehacker, deadspin, Kotaku, Jezebel, and so on. Notice you have to give its id to find the RSS stream in gawker maze " ;
2016-08-22 02:25:56 +03:00
$this -> parameters [] = array (
2016-08-25 18:12:54 +03:00
'site' => array (
'name' => 'site id to put in uri between feeds.gawker.com and /full .. which is obviously not full AT ALL' ,
'required' => true
)
2016-08-22 02:25:56 +03:00
);
2015-11-05 18:50:18 +03:00
}
2015-03-18 19:42:55 +03:00
private function toURI ( $name ) {
return RSS_PREFIX . $name . RSS_SUFFIX ;
}
2014-04-02 13:55:42 +04:00
2016-08-25 02:24:53 +03:00
public function collectData (){
$param = $this -> parameters [ $this -> queriedContext ];
if ( empty ( $param [ 'site' ][ 'value' ])) {
2014-04-02 13:55:42 +04:00
trigger_error ( " If no site is provided, nothing is gonna happen " , E_USER_ERROR );
} else {
2016-08-25 02:24:53 +03:00
$this -> name = $param [ 'site' ][ 'value' ];
$url = $this -> toURI ( strtolower ( $param [ 'site' ][ 'value' ]));
2014-04-02 13:55:42 +04:00
}
2016-08-24 21:19:30 +03:00
$this -> debugMessage ( " loading feed from " . $this -> getURI ());
2016-08-25 18:11:49 +03:00
parent :: collectExpandableDatas ( $url );
2014-04-02 13:55:42 +04:00
}
2016-08-22 02:25:56 +03:00
2015-03-18 19:42:55 +03:00
protected function parseRSSItem ( $newsItem ) {
2016-08-22 19:55:59 +03:00
$item = array ();
$item [ 'uri' ] = trim ( $newsItem -> link );
$item [ 'title' ] = trim ( $newsItem -> title );
$item [ 'timestamp' ] = $this -> RSS_2_0_time_to_timestamp ( $newsItem );
2016-08-24 21:19:30 +03:00
$this -> debugMessage ( " /////////////////////////////////////////////////////////////////////////////////////// \n processing item " . var_export ( $item , true ) . " \n \n \n built from \n \n \n " . var_export ( $newsItem , true ));
2014-04-02 13:55:42 +04:00
try {
// now load that uri from cache
2016-08-24 21:19:30 +03:00
$this -> debugMessage ( " loading page " . $item [ 'uri' ]);
2016-08-22 19:55:59 +03:00
$articlePage = str_get_html ( $this -> get_cached ( $item [ 'uri' ]));
2014-04-02 13:55:42 +04:00
if ( is_object ( $articlePage )) {
$content = $articlePage -> find ( '.post-content' , 0 );
2015-11-27 17:20:33 +03:00
HTMLSanitizer :: defaultImageSrcTo ( $content , $this -> getURI ());
2014-04-02 13:55:42 +04:00
$vcard = $articlePage -> find ( '.vcard' , 0 );
if ( is_object ( $vcard )) {
2015-03-18 19:42:55 +03:00
$authorLink = $vcard -> find ( 'a' , 0 );
2016-08-22 19:55:59 +03:00
$item [ 'author' ] = $authorLink -> innertext ;
2015-03-18 19:42:55 +03:00
// TODO use author link href to fill the feed info
2014-04-02 13:55:42 +04:00
}
2016-08-24 21:19:30 +03:00
$this -> debugMessage ( " item quite loaded : " . var_export ( $item , true ));
2015-03-18 19:42:55 +03:00
// I set item content as last element, for easier var_export reading
2016-08-22 19:55:59 +03:00
$item [ 'content' ] = $content -> innertext ;
2014-04-02 13:55:42 +04:00
} else {
2016-08-22 19:55:59 +03:00
throw new Exception ( " cache content for " . $item [ 'uri' ] . " is NOT a Simple DOM parser object ! " );
2014-04-02 13:55:42 +04:00
}
} catch ( Exception $e ) {
2016-08-24 21:19:30 +03:00
$this -> debugMessage ( " obtaining " . $item [ 'uri' ] . " resulted in exception " . $e -> getMessage () . " . Deleting cached page ... " );
2014-04-02 13:55:42 +04:00
// maybe file is incorrect. it should be discarded from cache
2016-08-22 19:55:59 +03:00
$this -> remove_from_cache ( $item [ 'url' ]);
$item [ 'content' ] = $e -> getMessage ();
2014-04-02 13:55:42 +04:00
}
2015-03-18 19:42:55 +03:00
return $item ;
2014-04-02 13:55:42 +04:00
}
}