mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2024-11-29 14:48:52 +03:00
Improve Facebook Bridge
Retrieve author display name from page title Build short readable title using name and content Convert relative links into absolute links Remove attributes from </a> tags (may cause issues) Remove onclick and onmouseover (javascript code) Retrieve url of first image of post as thumnail Use author avatar as thumnail if no first image Use display name in feed name: name - Fb bridge Minor code indent fixes and use simple quotes
This commit is contained in:
parent
762ad0291e
commit
d7436c2d0a
1 changed files with 73 additions and 61 deletions
|
@ -1,87 +1,99 @@
|
||||||
<?php
|
<?php
|
||||||
/**
|
/**
|
||||||
*
|
|
||||||
* @name Facebook
|
* @name Facebook
|
||||||
* @homepage http://facebook.com/
|
* @homepage http://facebook.com/
|
||||||
* @description Input a page title or a profile log. For a profile log, please insert the parameter as follow : myExamplePage/132621766841117
|
* @description Input a page title or a profile log. For a profile log, please insert the parameter as follow : myExamplePage/132621766841117
|
||||||
* @update 03/08/2015
|
* @update 05/09/2015
|
||||||
* @maintainer teromene
|
* @maintainer teromene
|
||||||
* @use1(u="username")
|
* @use1(u="username")
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class FacebookBridge extends BridgeAbstract{
|
class FacebookBridge extends BridgeAbstract{
|
||||||
|
|
||||||
public function collectData(array $param){
|
private $name;
|
||||||
|
|
||||||
$html = '';
|
public function collectData(array $param){
|
||||||
|
|
||||||
if(isset($param['u'])) {
|
$html = '';
|
||||||
if(!strpos($param['u'], "/")) {
|
|
||||||
$html = file_get_html('https://facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
|
if(isset($param['u'])) {
|
||||||
|
if(!strpos($param['u'], "/")) {
|
||||||
|
$html = file_get_html('https://facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
|
||||||
|
} else {
|
||||||
|
$html = file_get_html('https://facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
|
$this->returnError('You must specify a Facebook username.', 400);
|
||||||
$html = file_get_html('https://facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
|
|
||||||
$this->returnError('You must specify a Facebook username.', 400);
|
$element = $html->find('[id^=PagePostsSectionPagelet-]')[0]->children(0)->children(0);
|
||||||
|
|
||||||
}
|
if(isset($element)) {
|
||||||
|
|
||||||
|
$author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext);
|
||||||
|
$profilePic = 'https://graph.facebook.com/'.$param['u'].'/picture?width=200&height=200';
|
||||||
|
$this->name = $author;
|
||||||
|
|
||||||
|
foreach($element->children() as $post) {
|
||||||
|
|
||||||
$element = $html->find("[id^=PagePostsSectionPagelet-]")[0]->children(0)->children(0);
|
$item = new \Item();
|
||||||
|
|
||||||
if(isset($element)) {
|
if($post->hasAttribute("data-time")) {
|
||||||
|
|
||||||
foreach($element->children() as $post) {
|
//Clean the content of the page and convert relative links into absolute links
|
||||||
|
$content = preg_replace('/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i', '', $post);
|
||||||
|
$content = preg_replace('/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i', '', $content);
|
||||||
|
$content = preg_replace('/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i', '', $content);
|
||||||
|
$content = preg_replace('/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i', '', $content);
|
||||||
|
$content = str_replace(' href="/', ' href="https://facebook.com/', $content);
|
||||||
|
$content = preg_replace('/ onmouseover=\"[^"]+\"/i', '', $content);
|
||||||
|
$content = preg_replace('/ onclick=\"[^"]+\"/i', '', $content);
|
||||||
|
$content = preg_replace('/<\/a [^>]+>/i', '</a>', $content);
|
||||||
|
$content = strip_tags($content,'<a><img>');
|
||||||
|
|
||||||
$item = new \Item();
|
//Retrieve date of the post
|
||||||
|
$date = $post->find("abbr")[0];
|
||||||
|
if(isset($date) && $date->hasAttribute('data-utime')) {
|
||||||
|
$date = $date->getAttribute('data-utime');
|
||||||
|
} else {
|
||||||
|
$date = 0;
|
||||||
|
}
|
||||||
|
|
||||||
if($post->hasAttribute("data-time")) {
|
//Build title from username and content
|
||||||
|
$title = $author;
|
||||||
|
if (strlen($title) > 24)
|
||||||
|
$title = substr($title, 0, strpos(wordwrap($title, 24), "\n")).'...';
|
||||||
|
$title = $title.' | '.strip_tags($content);
|
||||||
|
if (strlen($title) > 64)
|
||||||
|
$title = substr($title, 0, strpos(wordwrap($title, 64), "\n")).'...';
|
||||||
|
|
||||||
//Clean the content of the page
|
//Use first image as thumbnail if available, or profile pic fallback
|
||||||
$content = preg_replace('/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i', "", $post);
|
$thumbnail = $post->find('img', 1)->src;
|
||||||
$content = preg_replace('/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i', "", $content);
|
if (strlen($thumbnail) == 0)
|
||||||
$content = preg_replace('/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i', "", $content);
|
$thumbnail = $profilePic;
|
||||||
$content = preg_replace('/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i', "", $content);
|
|
||||||
|
|
||||||
$content = strip_tags($content,"<a><img>");
|
//Build and add final item
|
||||||
|
$item->uri = 'https://facebook.com'.str_replace('&', '&', $post->find('abbr')[0]->parent()->getAttribute('href'));
|
||||||
|
$item->thumbnailUri = $thumbnail;
|
||||||
$date = $post->find("abbr")[0];
|
$item->content = $content;
|
||||||
if(isset($date) && $date->hasAttribute("data-utime")) {
|
$item->title = $title;
|
||||||
$date = $date->getAttribute("data-utime");
|
$item->author = $author;
|
||||||
} else {
|
$item->timestamp = $date;
|
||||||
$date = 0;
|
$this->items[] = $item;
|
||||||
}
|
}
|
||||||
|
|
||||||
$item->uri = 'https://facebook.com'.str_replace("&", "&", $post->find("abbr")[0]->parent()->getAttribute("href"));
|
|
||||||
|
|
||||||
$item->content = $content;
|
|
||||||
$item->title = $param['u']." | ".strip_tags($content);
|
|
||||||
$item->timestamp = $date;
|
|
||||||
|
|
||||||
$this->items[] = $item;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function getName() {
|
||||||
|
return (isset($this->name) ? $this->name.' - ' : '').'Facebook Bridge';
|
||||||
|
}
|
||||||
|
|
||||||
}
|
public function getURI() {
|
||||||
|
return 'http://facebook.com';
|
||||||
|
}
|
||||||
|
|
||||||
public function getName(){
|
public function getCacheDuration() {
|
||||||
return 'Facebook Bridge';
|
return 300; // 5 minutes
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getURI(){
|
|
||||||
return 'http://facebook.com';
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getCacheDuration(){
|
|
||||||
return 300; // 5 minutes
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
||||||
|
|
Loading…
Reference in a new issue