diff --git a/bridges/GiphyBridge.php b/bridges/GiphyBridge.php new file mode 100644 index 00000000..b0a4f899 --- /dev/null +++ b/bridges/GiphyBridge.php @@ -0,0 +1,86 @@ +returnError('No results for this query.', 404); + } + else { + $this->returnError('You must specify a search worf (?s=...).', 400); + } + + $max = GIPHY_LIMIT; + if (isset($param['n'])) { + $max = (integer) $param['n']; + } + + $limit = 0; + $kw = urlencode($param['s']); + foreach($html->find('div.hoverable-gif') as $entry) { + if($limit < $max) { + $node = $entry->first_child(); + $href = $node->getAttribute('href'); + + $html2 = file_get_html($base_url . $href) or $this->returnError('No results for this query.', 404); + $figure = $html2->getElementByTagName('figure'); + $img = $figure->firstChild(); + $caption = $figure->lastChild(); + + $item = new \Item(); + $item->id = $img->getAttribute('data-gif_id'); + $item->uri = $img->getAttribute('data-bitly_gif_url'); + $item->username = 'Giphy - '.ucfirst($kw); + $title = $caption->innertext(); + $title = preg_replace('/\s+/', ' ',$title); + $title = str_replace('animated GIF', '', $title); + $title = str_replace($kw, '', $title); + $title = preg_replace('/\s+/', ' ',$title); + $title = trim($title); + if (strlen($title) <= 0) { + $title = $item->id; + } + $item->title = trim($title); + $item->content = + '' + .'' + .''; + + $this->items[] = $item; + $limit++; + } + } + } + + public function getName(){ + return 'Giphy Bridge'; + } + + public function getURI(){ + return 'http://giphy.com/'; + } + + public function getCacheDuration(){ + return 300; // 5 minutes + } + + public function getUsername(){ + return $this->items[0]->username; + } +} diff --git a/bridges/TwitterBridgeTweaked.php b/bridges/TwitterBridgeTweaked.php new file mode 100644 index 00000000..6d7b7b83 --- /dev/null +++ b/bridges/TwitterBridgeTweaked.php @@ -0,0 +1,152 @@ + 0) ? true : false; + return $has_tld; + } + private function cleaner($url) { + $U = explode(' ', $url); + $W =array(); + foreach ($U as $k => $u) { + if (stristr($u,".")) { //only preg_match if there is a dot + if ($this->containsTLD($u) === true) { + unset($U[$k]); + return $this->cleaner( implode(' ', $U) ); + } + } + } + return implode(' ', $U); + } + + // (c) Kraoc / urlclean + // https://github.com/kraoc/Leed-market/blob/master/urlclean/urlclean.plugin.disabled.php + private function resolve_url($link) { + // fallback to crawl to real url (slowest method and unsecure to privacy) + if (function_exists('curl_init') && !ini_get('safe_mode')) { + curl_setopt($ch, CURLOPT_USERAGENT, $ua); + curl_setopt($ch, CURLOPT_URL, $link); + curl_setopt($ch, CURLOPT_HEADER, true); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + // >>> anonimization + curl_setopt($ch, CURLOPT_COOKIESESSION, true); + curl_setopt($ch, CURLOPT_REFERER, ''); + // <<< anonimization + $ch = curl_init(); + $ua = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.16 (KHTML, like Gecko) Chrome/24.0.1304.0 Safari/537.16'; + $a = curl_exec($ch); + $link = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); + } + + $link = preg_replace("/[&#?]xtor=(.)+/", "", $link); // remove: xtor + $link = preg_replace("/utm_([^&#]|(&))+&*/", "", $link); // remove: utm_ + + // cleanup end of url + $link = preg_replace("/\?&/", "", $link); + if (isset($link[strlen($link) -1])){ + if ($link[strlen($link) -1] == '?') + $link = substr($link, 0, strlen($link) -1); + } + + return $link; + } + + public function collectData(array $param){ + $html = ''; + if (isset($param['q'])) { /* keyword search mode */ + $html = file_get_html('https://twitter.com/search/realtime?q='.urlencode($param['q']).'+include:retweets&src=typd') or $this->returnError('No results for this query.', 404); + } + elseif (isset($param['u'])) { /* user timeline mode */ + $html = file_get_html('https://twitter.com/'.urlencode($param['u']).'/with_replies') or $this->returnError('Requested username can\'t be found.', 404); + } + else { + $this->returnError('You must specify a keyword (?q=...) or a Twitter username (?u=...).', 400); + } + + foreach($html->find('div.js-stream-tweet') as $tweet) { + $item = new \Item(); + // extract username and sanitize + $item->username = $tweet->getAttribute('data-screen-name'); + // extract fullname (pseudonym) + $item->fullname = $tweet->getAttribute('data-name'); + // get avatar link + $item->avatar = $tweet->find('img', 0)->src; + // get TweetID + $item->id = $tweet->getAttribute('data-tweet-id'); + // get tweet link + $item->uri = 'https://twitter.com'.$tweet->find('a.js-permalink', 0)->getAttribute('href'); + // extract tweet timestamp + $item->timestamp = $tweet->find('span.js-short-timestamp', 0)->getAttribute('data-time'); + // extract plaintext + $item->content_simple = str_replace('href="/', 'href="https://twitter.com/', html_entity_decode(strip_tags($tweet->find('p.js-tweet-text', 0)->innertext, ''))); + + // processing content links + foreach($tweet->find('a') as $link) { + if($link->hasAttribute('data-expanded-url') ) { + $link->href = $link->getAttribute('data-expanded-url'); + } + $link->removeAttribute('data-expanded-url'); + $link->removeAttribute('data-query-source'); + $link->removeAttribute('rel'); + $link->removeAttribute('class'); + $link->removeAttribute('target'); + $link->removeAttribute('title'); + } + + // get tweet text + $item->content = 'avatar'.$item->username.' '.$item->fullname.'
'.str_replace('href="/', 'href="https://twitter.com/', $tweet->find('p.js-tweet-text', 0)->innertext).'
'; + // generate the title +// $item->title = $item->fullname . ' (@'. $item->username . ') | ' . $item->content_simple; + $item->title = $item->content_simple; + $item->title = preg_replace('|https?://www\.[a-z\.0-9]+|i', '', $item->title); // remove http(s) links + $item->title = preg_replace('|www\.[a-z\.0-9]+|i', '', $item->title); // remove www. links + $item->title = $this->cleaner($item->title); // remove all remaining links + $item->title = trim($item->title); // remove extra spaces at beginning and end + + // convert all content links to real ones + $regex = "/(http|https|ftp|ftps)\:\/\/[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(\/\S*)?/"; + $item->content = preg_replace_callback($regex, function($url) { + // do stuff with $url[0] here + return $this->resolve_url($url[0]); + }, $item->content); + + // put out + $this->items[] = $item; + } + } + + public function getName(){ + return 'Twitter Bridge Tweaked'; + } + + public function getURI(){ + return 'http://twitter.com'; + } + + public function getCacheDuration(){ + return 300; // 5 minutes + } + + public function getUsername(){ + return $this->items[0]->username; + } +}