rss-bridge/bridges/WordPressBridge.php
logmanoriginal 0a008c4eb3 [WordPress] Support atom AND rss for feed replies
Some sites return RSS feeds instead of ATOM feeds even though ATOM
feeds were specifically requested (/feeds/atom)!

This bridge will now detect ATOM/RSS feeds and change behavior
accordingly.
2016-08-04 20:06:53 +02:00

136 lines
4.4 KiB
PHP

<?php
define('WORDPRESS_TYPE_ATOM', 1); // Content is of type ATOM
define('WORDPRESS_TYPE_RSS', 2); // Content is of type RSS
class WordPressBridge extends BridgeAbstract {
private $url;
public function loadMetadatas() {
$this->maintainer = "aledeg";
$this->name = "Wordpress Bridge";
$this->uri = "https://wordpress.org/";
$this->description = "Returns the 3 newest full posts of a Wordpress blog";
$this->update = "2016-08-04";
$this->parameters[] =
'[
{
"name" : "blog URL",
"required" : "true",
"identifier" : "url"
}
]';
}
// Returns the content type for a given html dom
function DetectContentType($html){
if($html->find('entry'))
return WORDPRESS_TYPE_ATOM;
if($html->find('item'))
return WORDPRESS_TYPE_RSS;
return WORDPRESS_TYPE_ATOM; // Make ATOM default
}
// Replaces all 'link' tags with 'url' for simplehtmldom to actually find 'links' ('url')
function ReplaceLinkTagsWithUrlTags($element){
// We need to fix the 'link' tag as simplehtmldom cannot parse it (just rename it and load back as dom)
$element_text = $element->outertext;
$element_text = str_replace('<link>', '<url>', $element_text);
$element_text = str_replace('</link>', '</url>', $element_text);
return str_get_html($element_text);
}
public function collectData(array $param) {
function StripCDATA($string) {
$string = str_replace('<![CDATA[', '', $string);
$string = str_replace(']]>', '', $string);
return $string;
}
function clearContent($content) {
$content = preg_replace('/<script.*\/script>/', '', $content);
$content = preg_replace('/<div class="wpa".*/', '', $content);
return $content;
}
$this->processParams($param);
if (!$this->hasUrl()) {
$this->returnError('You must specify a URL', 400);
}
$this->url = $this->url.'/feed/atom';
$html = $this->file_get_html($this->url) or $this->returnError("Could not request {$this->url}.", 404);
// Notice: We requested an ATOM feed, however some sites return RSS feeds instead!
$type = $this->DetectContentType($html);
if($type === WORDPRESS_TYPE_RSS)
$posts = $html->find('item');
else
$posts = $html->find('entry');
if(!empty($posts) ) {
$this->name = $html->find('title', 0)->plaintext;
$i=0;
foreach ($posts as $article) {
if($i < 3) {
$article = $this->ReplaceLinkTagsWithUrlTags($article);
if($type === WORDPRESS_TYPE_RSS){
$this->items[$i]->uri = $article->find('url', 0)->innertext; // 'link' => 'url'!
$this->items[$i]->title = $article->find('title', 0)->plaintext;
$this->items[$i]->author = trim($article->find('dc:creator', 0)->innertext);
$this->items[$i]->timestamp = strtotime($article->find('pubDate', 0)->innertext);
} else {
$this->items[$i]->uri = $article->find('url', 0)->getAttribute('href'); // 'link' => 'url'!
$this->items[$i]->title = StripCDATA($article->find('title', 0)->plaintext);
$this->items[$i]->author = trim($article->find('author', 0)->innertext);
$this->items[$i]->timestamp = strtotime($article->find('updated', 0)->innertext);
}
$article_html = $this->file_get_html($this->items[$i]->uri);
$this->items[$i]->content = clearContent($article_html->find('article', 0)->innertext);
if(empty($this->items[$i]->content))
$this->items[$i]->content = clearContent($article_html->find('.single-content', 0)->innertext); // another common content div
if(empty($this->items[$i]->content))
$this->items[$i]->content = clearContent($article_html->find('.post', 0)->innertext); // for old WordPress themes without HTML5
$i++;
}
}
}
else {
$this->returnError("Sorry, {$this->url} doesn't seem to be a Wordpress blog.", 404);
}
}
public function getName() {
return "{$this->name} - Wordpress Bridge";
}
public function getURI() {
return $this->url;
}
public function getCacheDuration() {
return 3600*3; // 3 hours
}
private function hasUrl() {
if (empty($this->url)) {
return false;
}
return true;
}
private function processParams($param) {
$this->url = $param['url'];
}
}