2014-05-26 21:45:10 +04:00
|
|
|
<?php
|
2016-09-15 00:01:54 +03:00
|
|
|
class WordPressBridge extends FeedExpander {
|
2017-02-11 18:16:56 +03:00
|
|
|
const MAINTAINER = 'aledeg';
|
|
|
|
const NAME = 'Wordpress Bridge';
|
|
|
|
const URI = 'https://wordpress.org/';
|
2016-09-25 18:04:28 +03:00
|
|
|
const CACHE_TIMEOUT = 10800; // 3h
|
2017-02-11 18:16:56 +03:00
|
|
|
const DESCRIPTION = 'Returns the newest full posts of a Wordpress powered website';
|
2015-11-05 18:50:18 +03:00
|
|
|
|
2016-08-30 12:23:55 +03:00
|
|
|
const PARAMETERS = array( array(
|
2017-02-11 18:16:56 +03:00
|
|
|
'url' => array(
|
|
|
|
'name' => 'Blog URL',
|
|
|
|
'required' => true
|
2016-08-27 22:03:26 +03:00
|
|
|
)
|
|
|
|
));
|
2016-08-04 21:06:53 +03:00
|
|
|
|
2017-02-11 18:16:56 +03:00
|
|
|
private function clearContent($content){
|
2016-08-04 22:42:06 +03:00
|
|
|
$content = preg_replace('/<script[^>]*>[^<]*<\/script>/', '', $content);
|
2016-08-04 21:15:28 +03:00
|
|
|
$content = preg_replace('/<div class="wpa".*/', '', $content);
|
2016-08-04 22:42:06 +03:00
|
|
|
$content = preg_replace('/<form.*\/form>/', '', $content);
|
2016-08-04 21:15:28 +03:00
|
|
|
return $content;
|
|
|
|
}
|
2016-08-02 16:46:21 +03:00
|
|
|
|
2016-09-15 00:01:54 +03:00
|
|
|
protected function parseItem($newItem){
|
2017-02-11 18:16:56 +03:00
|
|
|
$item = parent::parseItem($newItem);
|
2016-09-15 00:01:54 +03:00
|
|
|
|
2016-09-26 00:22:33 +03:00
|
|
|
$article_html = getSimpleHTMLDOMCached($item['uri']);
|
2016-09-15 00:01:54 +03:00
|
|
|
|
2017-02-11 18:16:56 +03:00
|
|
|
$article = null;
|
2017-07-29 20:28:00 +03:00
|
|
|
switch(true) {
|
2017-02-11 18:16:56 +03:00
|
|
|
case !is_null($article_html->find('article', 0)):
|
2016-09-15 00:01:54 +03:00
|
|
|
// most common content div
|
|
|
|
$article = $article_html->find('article', 0);
|
|
|
|
break;
|
2017-02-11 18:16:56 +03:00
|
|
|
case !is_null($article_html->find('.single-content', 0)):
|
2016-09-15 00:01:54 +03:00
|
|
|
// another common content div
|
|
|
|
$article = $article_html->find('.single-content', 0);
|
|
|
|
break;
|
2017-02-11 18:16:56 +03:00
|
|
|
case !is_null($article_html->find('.post-content', 0)):
|
2016-09-15 13:36:24 +03:00
|
|
|
// another common content div
|
|
|
|
$article = $article_html->find('.post-content', 0);
|
|
|
|
break;
|
|
|
|
|
2017-02-11 18:16:56 +03:00
|
|
|
case !is_null($article_html->find('.post', 0)):
|
2016-09-15 00:01:54 +03:00
|
|
|
// for old WordPress themes without HTML5
|
|
|
|
$article = $article_html->find('.post', 0);
|
|
|
|
break;
|
2016-08-27 21:42:05 +03:00
|
|
|
}
|
2016-08-04 21:06:53 +03:00
|
|
|
|
2017-07-29 20:28:00 +03:00
|
|
|
if(!is_null($article)) {
|
2016-09-15 00:01:54 +03:00
|
|
|
$item['content'] = $this->clearContent($article->innertext);
|
|
|
|
}
|
2016-08-04 22:06:12 +03:00
|
|
|
|
2016-09-15 00:01:54 +03:00
|
|
|
return $item;
|
|
|
|
}
|
2016-08-04 22:06:12 +03:00
|
|
|
|
2016-09-15 12:13:18 +03:00
|
|
|
public function getURI(){
|
|
|
|
$url = $this->getInput('url');
|
2017-07-29 20:28:00 +03:00
|
|
|
if(empty($url)) {
|
2017-02-15 00:36:33 +03:00
|
|
|
$url = parent::getURI();
|
2016-09-15 13:35:52 +03:00
|
|
|
}
|
2016-09-15 12:13:18 +03:00
|
|
|
return $url;
|
|
|
|
}
|
|
|
|
|
2016-09-15 00:01:54 +03:00
|
|
|
public function collectData(){
|
2017-07-29 20:28:00 +03:00
|
|
|
if($this->getInput('url') && substr($this->getInput('url'), 0, strlen('http')) !== 'http') {
|
2016-09-15 00:01:54 +03:00
|
|
|
// just in case someone find a way to access local files by playing with the url
|
2016-09-26 00:22:33 +03:00
|
|
|
returnClientError('The url parameter must either refer to http or https protocol.');
|
2014-05-26 21:45:10 +04:00
|
|
|
}
|
2017-02-07 13:24:18 +03:00
|
|
|
try{
|
2017-02-11 18:16:56 +03:00
|
|
|
$this->collectExpandableDatas($this->getURI() . '/feed/atom/');
|
2017-07-29 20:28:00 +03:00
|
|
|
} catch (HttpException $e) {
|
2017-02-11 18:16:56 +03:00
|
|
|
$this->collectExpandableDatas($this->getURI() . '/?feed=atom');
|
2017-02-07 13:24:18 +03:00
|
|
|
}
|
2016-08-27 21:42:05 +03:00
|
|
|
|
2014-05-26 21:45:10 +04:00
|
|
|
}
|
|
|
|
}
|