diff --git a/README.md b/README.md index e0487e6b..dee69b85 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,10 @@ Alternatively find another Requires minimum PHP 7.4. +```shell +apt install nginx php-fpm php-mbstring php-simplexml php-curl +``` + ```shell cd /var/www composer create-project -v --no-dev rss-bridge/rss-bridge @@ -334,10 +338,11 @@ This is the feed item structure that bridges are expected to produce. ### Cache backends -* `file` -* `sqlite` -* `memcached` -* `null` +* `File` +* `SQLite` +* `Memcached` +* `Array` +* `Null` ### Licenses diff --git a/actions/ConnectivityAction.php b/actions/ConnectivityAction.php index 604b7806..3bc82a9d 100644 --- a/actions/ConnectivityAction.php +++ b/actions/ConnectivityAction.php @@ -34,7 +34,7 @@ class ConnectivityAction implements ActionInterface public function execute(array $request) { if (!Debug::isEnabled()) { - throw new \Exception('This action is only available in debug mode!'); + return new Response('This action is only available in debug mode!'); } $bridgeName = $request['bridge'] ?? null; @@ -43,7 +43,7 @@ class ConnectivityAction implements ActionInterface } $bridgeClassName = $this->bridgeFactory->createBridgeClassName($bridgeName); if (!$bridgeClassName) { - throw new \Exception(sprintf('Bridge not found: %s', $bridgeName)); + return new Response('Bridge not found', 404); } return $this->reportBridgeConnectivity($bridgeClassName); } @@ -54,29 +54,25 @@ class ConnectivityAction implements ActionInterface throw new \Exception('Bridge is not whitelisted!'); } - $retVal = [ - 'bridge' => $bridgeClassName, - 'successful' => false, - 'http_code' => 200, - ]; - $bridge = $this->bridgeFactory->create($bridgeClassName); $curl_opts = [ - CURLOPT_CONNECTTIMEOUT => 5 + CURLOPT_CONNECTTIMEOUT => 5, + CURLOPT_FOLLOWLOCATION => true, + ]; + $result = [ + 'bridge' => $bridgeClassName, + 'successful' => false, + 'http_code' => null, ]; try { - $reply = getContents($bridge::URI, [], $curl_opts, true); - - if ($reply['code'] === 200) { - $retVal['successful'] = true; - if (strpos(implode('', $reply['status_lines']), '301 Moved Permanently')) { - $retVal['http_code'] = 301; - } + $response = getContents($bridge::URI, [], $curl_opts, true); + $result['http_code'] = $response['code']; + if (in_array($response['code'], [200])) { + $result['successful'] = true; } } catch (\Exception $e) { - $retVal['successful'] = false; } - return new Response(Json::encode($retVal), 200, ['Content-Type' => 'text/json']); + return new Response(Json::encode($result), 200, ['content-type' => 'text/json']); } } diff --git a/actions/DetectAction.php b/actions/DetectAction.php index 6c9fa22d..49b7ced7 100644 --- a/actions/DetectAction.php +++ b/actions/DetectAction.php @@ -45,7 +45,7 @@ class DetectAction implements ActionInterface $bridgeParams['format'] = $format; $url = '?action=display&' . http_build_query($bridgeParams); - return new Response('', 301, ['Location' => $url]); + return new Response('', 301, ['location' => $url]); } throw new \Exception('No bridge found for given URL: ' . $targetURL); diff --git a/actions/DisplayAction.php b/actions/DisplayAction.php index 7b2efec1..7c59b3d5 100644 --- a/actions/DisplayAction.php +++ b/actions/DisplayAction.php @@ -10,50 +10,41 @@ class DisplayAction implements ActionInterface return new Response('503 Service Unavailable', 503); } $this->cache = RssBridge::getCache(); - $this->cache->setScope('http'); - $this->cache->setKey($request); - // avg timeout of 20m - $timeout = 60 * 15 + rand(1, 60 * 10); + $cacheKey = 'http_' . json_encode($request); /** @var Response $cachedResponse */ - $cachedResponse = $this->cache->loadData($timeout); - if ($cachedResponse && !Debug::isEnabled()) { - //Logger::info(sprintf('Returning cached (http) response: %s', $cachedResponse->getBody())); + $cachedResponse = $this->cache->get($cacheKey); + if ($cachedResponse) { + $ifModifiedSince = $_SERVER['HTTP_IF_MODIFIED_SINCE'] ?? null; + $lastModified = $cachedResponse->getHeader('last-modified'); + if ($ifModifiedSince && $lastModified) { + $lastModified = new \DateTimeImmutable($lastModified); + $lastModifiedTimestamp = $lastModified->getTimestamp(); + $modifiedSince = strtotime($ifModifiedSince); + if ($lastModifiedTimestamp <= $modifiedSince) { + $modificationTimeGMT = gmdate('D, d M Y H:i:s ', $lastModifiedTimestamp); + return new Response('', 304, ['last-modified' => $modificationTimeGMT . 'GMT']); + } + } return $cachedResponse; } - $response = $this->createResponse($request); - if (in_array($response->getCode(), [429, 503])) { - //Logger::info(sprintf('Storing cached (http) response: %s', $response->getBody())); - $this->cache->setScope('http'); - $this->cache->setKey($request); - $this->cache->saveData($response); - } - return $response; - } - - private function createResponse(array $request) - { - $bridgeFactory = new BridgeFactory(); - $formatFactory = new FormatFactory(); $bridgeName = $request['bridge'] ?? null; - $format = $request['format'] ?? null; - + if (!$bridgeName) { + return new Response('Missing bridge param', 400); + } + $bridgeFactory = new BridgeFactory(); $bridgeClassName = $bridgeFactory->createBridgeClassName($bridgeName); if (!$bridgeClassName) { - throw new \Exception(sprintf('Bridge not found: %s', $bridgeName)); + return new Response('Bridge not found', 404); } + $format = $request['format'] ?? null; if (!$format) { - throw new \Exception('You must specify a format!'); + return new Response('You must specify a format!', 400); } if (!$bridgeFactory->isEnabled($bridgeClassName)) { - throw new \Exception('This bridge is not whitelisted'); + return new Response('This bridge is not whitelisted', 400); } - $format = $formatFactory->create($format); - - $bridge = $bridgeFactory->create($bridgeClassName); - $bridge->loadConfiguration(); - $noproxy = $request['_noproxy'] ?? null; if ( Configuration::getConfig('proxy', 'url') @@ -64,147 +55,100 @@ class DisplayAction implements ActionInterface define('NOPROXY', true); } - $cacheTimeout = $request['_cache_timeout'] ?? null; - if (Configuration::getConfig('cache', 'custom_timeout') && $cacheTimeout) { - $cacheTimeout = (int) $cacheTimeout; - } else { - // At this point the query argument might still be in the url but it won't be used - $cacheTimeout = $bridge->getCacheTimeout(); + $bridge = $bridgeFactory->create($bridgeClassName); + $formatFactory = new FormatFactory(); + $format = $formatFactory->create($format); + + $response = $this->createResponse($request, $bridge, $format); + + if ($response->getCode() === 200) { + $ttl = $request['_cache_timeout'] ?? null; + if (Configuration::getConfig('cache', 'custom_timeout') && $ttl) { + $ttl = (int) $ttl; + } else { + $ttl = $bridge->getCacheTimeout(); + } + $this->cache->set($cacheKey, $response, $ttl); } - // Remove parameters that don't concern bridges - $bridge_params = array_diff_key( - $request, - array_fill_keys( - [ - 'action', - 'bridge', - 'format', - '_noproxy', - '_cache_timeout', - '_error_time' - ], - '' - ) - ); + if (in_array($response->getCode(), [429, 503])) { + $this->cache->set($cacheKey, $response, 60 * 15 + rand(1, 60 * 10)); // average 20m + } - // Remove parameters that don't concern caches - $cache_params = array_diff_key( - $request, - array_fill_keys( - [ - 'action', - 'format', - '_noproxy', - '_cache_timeout', - '_error_time' - ], - '' - ) - ); - - $this->cache->setScope(''); - $this->cache->setKey($cache_params); + if ($response->getCode() === 500) { + $this->cache->set($cacheKey, $response, 60 * 15); + } + if (rand(1, 100) === 2) { + $this->cache->prune(); + } + return $response; + } + private function createResponse(array $request, BridgeInterface $bridge, FormatInterface $format) + { $items = []; $infos = []; - $feed = $this->cache->loadData($cacheTimeout); - - if ($feed && !Debug::isEnabled()) { - if (isset($_SERVER['HTTP_IF_MODIFIED_SINCE'])) { - $modificationTime = $this->cache->getTime(); - // The client wants to know if the feed has changed since its last check - $modifiedSince = strtotime($_SERVER['HTTP_IF_MODIFIED_SINCE']); - if ($modificationTime <= $modifiedSince) { - $modificationTimeGMT = gmdate('D, d M Y H:i:s ', $modificationTime); - return new Response('', 304, ['Last-Modified' => $modificationTimeGMT . 'GMT']); + try { + $bridge->loadConfiguration(); + // Remove parameters that don't concern bridges + $bridgeData = array_diff_key($request, array_fill_keys(['action', 'bridge', 'format', '_noproxy', '_cache_timeout', '_error_time'], '')); + $bridge->setDatas($bridgeData); + $bridge->collectData(); + $items = $bridge->getItems(); + if (isset($items[0]) && is_array($items[0])) { + $feedItems = []; + foreach ($items as $item) { + $feedItems[] = new FeedItem($item); + } + $items = $feedItems; + } + $infos = [ + 'name' => $bridge->getName(), + 'uri' => $bridge->getURI(), + 'donationUri' => $bridge->getDonationURI(), + 'icon' => $bridge->getIcon() + ]; + } catch (\Exception $e) { + $errorOutput = Configuration::getConfig('error', 'output'); + $reportLimit = Configuration::getConfig('error', 'report_limit'); + if ($e instanceof HttpException) { + // Reproduce (and log) these responses regardless of error output and report limit + if ($e->getCode() === 429) { + Logger::info(sprintf('Exception in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e))); + return new Response('429 Too Many Requests', 429); + } + if ($e->getCode() === 503) { + Logger::info(sprintf('Exception in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e))); + return new Response('503 Service Unavailable', 503); } } - - if (isset($feed['items']) && isset($feed['extraInfos'])) { - foreach ($feed['items'] as $item) { - $items[] = new FeedItem($item); - } - $infos = $feed['extraInfos']; + Logger::error(sprintf('Exception in DisplayAction(%s)', $bridge->getShortName()), ['e' => $e]); + $errorCount = 1; + if ($reportLimit > 1) { + $errorCount = $this->logBridgeError($bridge->getName(), $e->getCode()); } - } else { - try { - $bridge->setDatas($bridge_params); - $bridge->collectData(); - $items = $bridge->getItems(); - if (isset($items[0]) && is_array($items[0])) { - $feedItems = []; - foreach ($items as $item) { - $feedItems[] = new FeedItem($item); - } - $items = $feedItems; - } - $infos = [ - 'name' => $bridge->getName(), - 'uri' => $bridge->getURI(), - 'donationUri' => $bridge->getDonationURI(), - 'icon' => $bridge->getIcon() - ]; - } catch (\Exception $e) { - $errorOutput = Configuration::getConfig('error', 'output'); - $reportLimit = Configuration::getConfig('error', 'report_limit'); - if ($e instanceof HttpException) { - // Reproduce (and log) these responses regardless of error output and report limit - if ($e->getCode() === 429) { - Logger::info(sprintf('Exception in DisplayAction(%s): %s', $bridgeClassName, create_sane_exception_message($e))); - return new Response('429 Too Many Requests', 429); - } - if ($e->getCode() === 503) { - Logger::info(sprintf('Exception in DisplayAction(%s): %s', $bridgeClassName, create_sane_exception_message($e))); - return new Response('503 Service Unavailable', 503); - } - // Might want to cache other codes such as 504 Gateway Timeout - } - if (in_array($errorOutput, ['feed', 'none'])) { - Logger::error(sprintf('Exception in DisplayAction(%s): %s', $bridgeClassName, create_sane_exception_message($e)), ['e' => $e]); - } - $errorCount = 1; - if ($reportLimit > 1) { - $errorCount = $this->logBridgeError($bridge->getName(), $e->getCode()); - } - // Let clients know about the error if we are passed the report limit - if ($errorCount >= $reportLimit) { - if ($errorOutput === 'feed') { - // Render the exception as a feed item - $items[] = $this->createFeedItemFromException($e, $bridge); - } elseif ($errorOutput === 'http') { - // Rethrow so that the main exception handler in RssBridge.php produces an HTTP 500 - throw $e; - } elseif ($errorOutput === 'none') { - // Do nothing (produces an empty feed) - } else { - // Do nothing, unknown error output? Maybe throw exception or validate in Configuration.php - } + // Let clients know about the error if we are passed the report limit + if ($errorCount >= $reportLimit) { + if ($errorOutput === 'feed') { + // Render the exception as a feed item + $items[] = $this->createFeedItemFromException($e, $bridge); + } elseif ($errorOutput === 'http') { + return new Response(render(__DIR__ . '/../templates/error.html.php', ['e' => $e]), 500); + } elseif ($errorOutput === 'none') { + // Do nothing (produces an empty feed) } } - - // Unfortunately need to set scope and key again because they might be modified - $this->cache->setScope(''); - $this->cache->setKey($cache_params); - $this->cache->saveData([ - 'items' => array_map(function (FeedItem $item) { - return $item->toArray(); - }, $items), - 'extraInfos' => $infos - ]); - $this->cache->purgeCache(); } $format->setItems($items); $format->setExtraInfos($infos); - $newModificationTime = $this->cache->getTime(); - $format->setLastModified($newModificationTime); - $headers = []; - if ($newModificationTime) { - $headers['Last-Modified'] = gmdate('D, d M Y H:i:s ', $newModificationTime) . 'GMT'; - } - $headers['Content-Type'] = $format->getMimeType() . '; charset=' . $format->getCharset(); + $now = time(); + $format->setLastModified($now); + $headers = [ + 'last-modified' => gmdate('D, d M Y H:i:s ', $now) . 'GMT', + 'content-type' => $format->getMimeType() . '; charset=' . $format->getCharset(), + ]; return new Response($format->stringify(), 200, $headers); } @@ -234,9 +178,8 @@ class DisplayAction implements ActionInterface private function logBridgeError($bridgeName, $code) { - $this->cache->setScope('error_reporting'); - $this->cache->setkey([$bridgeName . '_' . $code]); - $report = $this->cache->loadData(); + $cacheKey = 'error_reporting_' . $bridgeName . '_' . $code; + $report = $this->cache->get($cacheKey); if ($report) { $report = Json::decode($report); $report['time'] = time(); @@ -248,7 +191,8 @@ class DisplayAction implements ActionInterface 'count' => 1, ]; } - $this->cache->saveData(Json::encode($report)); + $ttl = 86400 * 5; + $this->cache->set($cacheKey, Json::encode($report), $ttl); return $report['count']; } diff --git a/actions/ListAction.php b/actions/ListAction.php index 6ce7e33e..9025bf6e 100644 --- a/actions/ListAction.php +++ b/actions/ListAction.php @@ -37,6 +37,6 @@ class ListAction implements ActionInterface ]; } $list->total = count($list->bridges); - return new Response(Json::encode($list), 200, ['Content-Type' => 'application/json']); + return new Response(Json::encode($list), 200, ['content-type' => 'application/json']); } } diff --git a/actions/SetBridgeCacheAction.php b/actions/SetBridgeCacheAction.php index 416f2378..a8e712d4 100644 --- a/actions/SetBridgeCacheAction.php +++ b/actions/SetBridgeCacheAction.php @@ -19,7 +19,10 @@ class SetBridgeCacheAction implements ActionInterface $authenticationMiddleware = new ApiAuthenticationMiddleware(); $authenticationMiddleware($request); - $key = $request['key'] or returnClientError('You must specify key!'); + $key = $request['key'] ?? null; + if (!$key) { + returnClientError('You must specify key!'); + } $bridgeFactory = new BridgeFactory(); @@ -40,13 +43,10 @@ class SetBridgeCacheAction implements ActionInterface $value = $request['value']; $cache = RssBridge::getCache(); - $cache->setScope(get_class($bridge)); - if (!is_array($key)) { - // not sure if $key is an array when it comes in from request - $key = [$key]; - } - $cache->setKey($key); - $cache->saveData($value); + + $cacheKey = get_class($bridge) . '_' . $key; + $ttl = 86400 * 3; + $cache->set($cacheKey, $value, $ttl); header('Content-Type: text/plain'); echo 'done'; diff --git a/bridges/AO3Bridge.php b/bridges/AO3Bridge.php index 57e12fbd..e30c6b70 100644 --- a/bridges/AO3Bridge.php +++ b/bridges/AO3Bridge.php @@ -33,6 +33,7 @@ class AO3Bridge extends BridgeAbstract ], ] ]; + private $title; public function collectData() { @@ -94,11 +95,12 @@ class AO3Bridge extends BridgeAbstract $url = self::URI . "/works/$id/navigate"; $httpClient = RssBridge::getHttpClient(); + $version = 'v0.0.1'; $response = $httpClient->request($url, [ - 'useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)', + 'useragent' => "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)", ]); - $html = \str_get_html($response['body']); + $html = \str_get_html($response->getBody()); $html = defaultLinkTo($html, self::URI); $this->title = $html->find('h2 a', 0)->plaintext; diff --git a/bridges/BugzillaBridge.php b/bridges/BugzillaBridge.php index 9b4d1adc..c2dc8d40 100644 --- a/bridges/BugzillaBridge.php +++ b/bridges/BugzillaBridge.php @@ -159,7 +159,7 @@ class BugzillaBridge extends BridgeAbstract protected function getUser($user) { // Check if the user endpoint is available - if ($this->loadCacheValue($this->instance . 'userEndpointClosed', 86400)) { + if ($this->loadCacheValue($this->instance . 'userEndpointClosed')) { return $user; } diff --git a/bridges/ElloBridge.php b/bridges/ElloBridge.php index 4cc1858b..9017bc11 100644 --- a/bridges/ElloBridge.php +++ b/bridges/ElloBridge.php @@ -114,18 +114,17 @@ class ElloBridge extends BridgeAbstract private function getAPIKey() { $cache = RssBridge::getCache(); - $cache->setScope('ElloBridge'); - $cache->setKey(['key']); - $key = $cache->loadData(); + $cacheKey = 'ElloBridge_key'; + $apiKey = $cache->get($cacheKey); - if ($key == null) { - $keyInfo = getContents(self::URI . 'api/webapp-token') or - returnServerError('Unable to get token.'); - $key = json_decode($keyInfo)->token->access_token; - $cache->saveData($key); + if (!$apiKey) { + $keyInfo = getContents(self::URI . 'api/webapp-token') or returnServerError('Unable to get token.'); + $apiKey = json_decode($keyInfo)->token->access_token; + $ttl = 60 * 60 * 20; + $cache->set($cacheKey, $apiKey, $ttl); } - return $key; + return $apiKey; } public function getName() diff --git a/bridges/InstagramBridge.php b/bridges/InstagramBridge.php index 0f644c4a..9a846fb1 100644 --- a/bridges/InstagramBridge.php +++ b/bridges/InstagramBridge.php @@ -99,23 +99,22 @@ class InstagramBridge extends BridgeAbstract } $cache = RssBridge::getCache(); - $cache->setScope('InstagramBridge'); - $cache->setKey([$username]); - $key = $cache->loadData(); + $cacheKey = 'InstagramBridge_' . $username; + $pk = $cache->get($cacheKey); - if ($key == null) { + if (!$pk) { $data = $this->getContents(self::URI . 'web/search/topsearch/?query=' . $username); foreach (json_decode($data)->users as $user) { if (strtolower($user->user->username) === strtolower($username)) { - $key = $user->user->pk; + $pk = $user->user->pk; } } - if ($key == null) { + if (!$pk) { returnServerError('Unable to find username in search result.'); } - $cache->saveData($key); + $cache->set($cacheKey, $pk); } - return $key; + return $pk; } public function collectData() diff --git a/bridges/MastodonBridge.php b/bridges/MastodonBridge.php index 855aae08..81401be9 100644 --- a/bridges/MastodonBridge.php +++ b/bridges/MastodonBridge.php @@ -100,7 +100,7 @@ class MastodonBridge extends BridgeAbstract // We fetch the boosted content. try { $rtContent = $this->fetchAP($content['object']); - $rtUser = $this->loadCacheValue($rtContent['attributedTo'], 86400); + $rtUser = $this->loadCacheValue($rtContent['attributedTo']); if (!isset($rtUser)) { // We fetch the author, since we cannot always assume the format of the URL. $user = $this->fetchAP($rtContent['attributedTo']); diff --git a/bridges/RedditBridge.php b/bridges/RedditBridge.php index bd60243f..196f7d20 100644 --- a/bridges/RedditBridge.php +++ b/bridges/RedditBridge.php @@ -72,8 +72,30 @@ class RedditBridge extends BridgeAbstract ] ] ]; + private CacheInterface $cache; + + public function __construct() + { + $this->cache = RssBridge::getCache(); + } public function collectData() + { + $cacheKey = 'reddit_rate_limit'; + if ($this->cache->get($cacheKey)) { + throw new HttpException('429 Too Many Requests', 429); + } + try { + $this->collectDataInternal(); + } catch (HttpException $e) { + if ($e->getCode() === 429) { + $this->cache->set($cacheKey, true, 60 * 16); + throw $e; + } + } + } + + private function collectDataInternal(): void { $user = false; $comments = false; diff --git a/bridges/SoundcloudBridge.php b/bridges/SoundcloudBridge.php index 0bd9a2b0..5664761b 100644 --- a/bridges/SoundcloudBridge.php +++ b/bridges/SoundcloudBridge.php @@ -36,7 +36,7 @@ class SoundCloudBridge extends BridgeAbstract private $feedTitle = null; private $feedIcon = null; - private $cache = null; + private CacheInterface $cache; private $clientIdRegex = '/client_id.*?"(.+?)"/'; private $widgetRegex = '/widget-.+?\.js/'; @@ -44,8 +44,6 @@ class SoundCloudBridge extends BridgeAbstract public function collectData() { $this->cache = RssBridge::getCache(); - $this->cache->setScope('SoundCloudBridge'); - $this->cache->setKey(['client_id']); $res = $this->getUser($this->getInput('u')); @@ -121,11 +119,9 @@ HTML; private function getClientID() { - $this->cache->setScope('SoundCloudBridge'); - $this->cache->setKey(['client_id']); - $clientID = $this->cache->loadData(); + $clientID = $this->cache->get('SoundCloudBridge_client_id'); - if ($clientID == null) { + if (!$clientID) { return $this->refreshClientID(); } else { return $clientID; @@ -151,10 +147,7 @@ HTML; if (preg_match($this->clientIdRegex, $widgetJS, $matches)) { $clientID = $matches[1]; - $this->cache->setScope('SoundCloudBridge'); - $this->cache->setKey(['client_id']); - $this->cache->saveData($clientID); - + $this->cache->set('SoundCloudBridge_client_id', $clientID); return $clientID; } } diff --git a/bridges/SpotifyBridge.php b/bridges/SpotifyBridge.php index 7b7e2b1d..eb847f3d 100644 --- a/bridges/SpotifyBridge.php +++ b/bridges/SpotifyBridge.php @@ -279,10 +279,9 @@ class SpotifyBridge extends BridgeAbstract private function fetchAccessToken() { $cache = RssBridge::getCache(); - $cacheKey = sprintf('%s:%s', $this->getInput('clientid'), $this->getInput('clientsecret')); - $cache->setScope('SpotifyBridge'); - $cache->setKey([$cacheKey]); - $token = $cache->loadData(3600); + $cacheKey = sprintf('SpotifyBridge:%s:%s', $this->getInput('clientid'), $this->getInput('clientsecret')); + + $token = $cache->get($cacheKey); if ($token) { $this->token = $token; } else { @@ -294,9 +293,8 @@ class SpotifyBridge extends BridgeAbstract ]); $data = Json::decode($json); $this->token = $data['access_token']; - $cache->setScope('SpotifyBridge'); - $cache->setKey([$cacheKey]); - $cache->saveData($this->token); + + $cache->set($cacheKey, $this->token, 3600); } } diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index 8470dcf7..b9586150 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -594,156 +594,4 @@ EOD; { return (intval($tweet1['id']) < intval($tweet2['id']) ? 1 : -1); } - - //The aim of this function is to get an API key and a guest token - //This function takes 2 requests, and therefore is cached - private function getApiKey($forceNew = 0) - { - $r_cache = RssBridge::getCache(); - $scope = 'TwitterBridge'; - $r_cache->setScope($scope); - $r_cache->setKey(['refresh']); - $data = $r_cache->loadData(); - - $refresh = null; - if ($data === null) { - $refresh = time(); - $r_cache->saveData($refresh); - } else { - $refresh = $data; - } - - $cacheFactory = new CacheFactory(); - - $cache = RssBridge::getCache(); - $cache->setScope($scope); - $cache->setKey(['api_key']); - $data = $cache->loadData(); - - $apiKey = null; - if ($forceNew || $data === null || (time() - $refresh) > self::GUEST_TOKEN_EXPIRY) { - $twitterPage = getContents('https://twitter.com'); - - $jsLink = false; - $jsMainRegexArray = [ - '/(https:\/\/abs\.twimg\.com\/responsive-web\/web\/main\.[^\.]+\.js)/m', - '/(https:\/\/abs\.twimg\.com\/responsive-web\/web_legacy\/main\.[^\.]+\.js)/m', - '/(https:\/\/abs\.twimg\.com\/responsive-web\/client-web\/main\.[^\.]+\.js)/m', - '/(https:\/\/abs\.twimg\.com\/responsive-web\/client-web-legacy\/main\.[^\.]+\.js)/m', - ]; - foreach ($jsMainRegexArray as $jsMainRegex) { - if (preg_match_all($jsMainRegex, $twitterPage, $jsMainMatches, PREG_SET_ORDER, 0)) { - $jsLink = $jsMainMatches[0][0]; - break; - } - } - if (!$jsLink) { - returnServerError('Could not locate main.js link'); - } - - $jsContent = getContents($jsLink); - $apiKeyRegex = '/([a-zA-Z0-9]{59}%[a-zA-Z0-9]{44})/m'; - preg_match_all($apiKeyRegex, $jsContent, $apiKeyMatches, PREG_SET_ORDER, 0); - $apiKey = $apiKeyMatches[0][0]; - $cache->saveData($apiKey); - } else { - $apiKey = $data; - } - - $gt_cache = RssBridge::getCache(); - $gt_cache->setScope($scope); - $gt_cache->setKey(['guest_token']); - $guestTokenUses = $gt_cache->loadData(); - - $guestToken = null; - if ( - $forceNew || $guestTokenUses === null || !is_array($guestTokenUses) || count($guestTokenUses) != 2 - || $guestTokenUses[0] <= 0 || (time() - $refresh) > self::GUEST_TOKEN_EXPIRY - ) { - $guestToken = $this->getGuestToken($apiKey); - if ($guestToken === null) { - if ($guestTokenUses === null) { - returnServerError('Could not parse guest token'); - } else { - $guestToken = $guestTokenUses[1]; - } - } else { - $gt_cache->saveData([self::GUEST_TOKEN_USES, $guestToken]); - $r_cache->saveData(time()); - } - } else { - $guestTokenUses[0] -= 1; - $gt_cache->saveData($guestTokenUses); - $guestToken = $guestTokenUses[1]; - } - - $this->apiKey = $apiKey; - $this->guestToken = $guestToken; - $this->authHeaders = [ - 'authorization: Bearer ' . $apiKey, - 'x-guest-token: ' . $guestToken, - ]; - - return [$apiKey, $guestToken]; - } - - // Get a guest token. This is different to an API key, - // and it seems to change more regularly than the API key. - private function getGuestToken($apiKey) - { - $headers = [ - 'authorization: Bearer ' . $apiKey, - ]; - $opts = [ - CURLOPT_POST => 1, - ]; - - try { - $pageContent = getContents('https://api.twitter.com/1.1/guest/activate.json', $headers, $opts, true); - $guestToken = json_decode($pageContent['content'])->guest_token; - } catch (Exception $e) { - $guestToken = null; - } - return $guestToken; - } - - /** - * Tries to make an API call to twitter. - * @param $api string API entry point - * @param $params array additional URI parmaeters - * @return object json data - */ - private function makeApiCall($api, $params) - { - $uri = self::API_URI . $api . '?' . http_build_query($params); - - $retries = 1; - $retry = 0; - do { - $retry = 0; - - try { - $result = getContents($uri, $this->authHeaders, [], true); - } catch (HttpException $e) { - switch ($e->getCode()) { - case 401: - // fall-through - case 403: - if ($retries) { - $retries--; - $retry = 1; - $this->getApiKey(1); - continue 2; - } - // fall-through - default: - throw $e; - } - } - } while ($retry); - - $data = json_decode($result['content']); - - return $data; - } } diff --git a/bridges/WordPressMadaraBridge.php b/bridges/WordPressMadaraBridge.php index c5ff54b5..4325075c 100644 --- a/bridges/WordPressMadaraBridge.php +++ b/bridges/WordPressMadaraBridge.php @@ -117,7 +117,7 @@ The default URI shows the Madara demo page.'; protected function getMangaInfo($url) { $url_cache = 'TitleInfo_' . preg_replace('/[^\w]/', '.', rtrim($url, '/')); - $cache = $this->loadCacheValue($url_cache, 86400); + $cache = $this->loadCacheValue($url_cache); if (isset($cache)) { return $cache; } diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 54a38d98..8e3ac540 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -77,6 +77,138 @@ class YoutubeBridge extends BridgeAbstract private $channel_name = ''; // This took from repo BetterVideoRss of VerifiedJoseph. const URI_REGEX = '/(https?:\/\/(?:www\.)?(?:[a-zA-Z0-9-.]{2,256}\.[a-z]{2,20})(\:[0-9]{2 ,4})?(?:\/[a-zA-Z0-9@:%_\+.,~#"\'!?&\/\/=\-*]+|\/)?)/ims'; //phpcs:ignore + private CacheInterface $cache; + + public function __construct() + { + $this->cache = RssBridge::getCache(); + } + + private function collectDataInternal() + { + $xml = ''; + $html = ''; + $url_feed = ''; + $url_listing = ''; + + if ($this->getInput('u')) { + /* User and Channel modes */ + $this->request = $this->getInput('u'); + $url_feed = self::URI . 'feeds/videos.xml?user=' . urlencode($this->request); + $url_listing = self::URI . 'user/' . urlencode($this->request) . '/videos'; + } elseif ($this->getInput('c')) { + $this->request = $this->getInput('c'); + $url_feed = self::URI . 'feeds/videos.xml?channel_id=' . urlencode($this->request); + $url_listing = self::URI . 'channel/' . urlencode($this->request) . '/videos'; + } elseif ($this->getInput('custom')) { + $this->request = $this->getInput('custom'); + $url_listing = self::URI . urlencode($this->request) . '/videos'; + } + + if (!empty($url_feed) || !empty($url_listing)) { + $this->feeduri = $url_listing; + if (!empty($this->getInput('custom'))) { + $html = $this->ytGetSimpleHTMLDOM($url_listing); + $jsonData = $this->getJSONData($html); + $url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl; + $this->iconURL = $jsonData->metadata->channelMetadataRenderer->avatar->thumbnails[0]->url; + } + if (!$this->skipFeeds()) { + $html = $this->ytGetSimpleHTMLDOM($url_feed); + $this->ytBridgeParseXmlFeed($html); + } else { + if (empty($this->getInput('custom'))) { + $html = $this->ytGetSimpleHTMLDOM($url_listing); + $jsonData = $this->getJSONData($html); + } + $channel_id = ''; + if (isset($jsonData->contents)) { + $channel_id = $jsonData->metadata->channelMetadataRenderer->externalId; + $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1]; + $jsonData = $jsonData->tabRenderer->content->richGridRenderer->contents; + // $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items; + $this->parseJSONListing($jsonData); + } else { + returnServerError('Unable to get data from YouTube. Username/Channel: ' . $this->request); + } + } + $this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); + } elseif ($this->getInput('p')) { + /* playlist mode */ + // TODO: this mode makes a lot of excess video query requests. + // To make less requests, we need to cache following dictionary "videoId -> datePublished, duration" + // This cache will be used to find out, which videos to fetch + // to make feed of 15 items or more, if there a lot of videos published on that date. + $this->request = $this->getInput('p'); + $url_feed = self::URI . 'feeds/videos.xml?playlist_id=' . urlencode($this->request); + $url_listing = self::URI . 'playlist?list=' . urlencode($this->request); + $html = $this->ytGetSimpleHTMLDOM($url_listing); + $jsonData = $this->getJSONData($html); + // TODO: this method returns only first 100 video items + // if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element + $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0]; + $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer; + $jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents; + $item_count = count($jsonData); + + if ($item_count <= 15 && !$this->skipFeeds() && ($xml = $this->ytGetSimpleHTMLDOM($url_feed))) { + $this->ytBridgeParseXmlFeed($xml); + } else { + $this->parseJSONListing($jsonData); + } + $this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); + usort($this->items, function ($item1, $item2) { + if (!is_int($item1['timestamp']) && !is_int($item2['timestamp'])) { + $item1['timestamp'] = strtotime($item1['timestamp']); + $item2['timestamp'] = strtotime($item2['timestamp']); + } + return $item2['timestamp'] - $item1['timestamp']; + }); + } elseif ($this->getInput('s')) { + /* search mode */ + $this->request = $this->getInput('s'); + $url_listing = self::URI + . 'results?search_query=' + . urlencode($this->request) + . '&sp=CAI%253D'; + + $html = $this->ytGetSimpleHTMLDOM($url_listing); + + $jsonData = $this->getJSONData($html); + $jsonData = $jsonData->contents->twoColumnSearchResultsRenderer->primaryContents; + $jsonData = $jsonData->sectionListRenderer->contents; + foreach ($jsonData as $data) { + // Search result includes some ads, have to filter them + if (isset($data->itemSectionRenderer->contents[0]->videoRenderer)) { + $jsonData = $data->itemSectionRenderer->contents; + break; + } + } + $this->parseJSONListing($jsonData); + $this->feeduri = $url_listing; + $this->feedName = 'Search: ' . $this->request; + } else { + /* no valid mode */ + returnClientError("You must either specify either:\n - YouTube + username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)"); + } + } + + public function collectData() + { + $cacheKey = 'youtube_rate_limit'; + if ($this->cache->get($cacheKey)) { + throw new HttpException('429 Too Many Requests', 429); + } + try { + $this->collectDataInternal(); + } catch (HttpException $e) { + if ($e->getCode() === 429) { + $this->cache->set($cacheKey, true, 60 * 16); + throw $e; + } + } + } private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time) { @@ -153,7 +285,8 @@ class YoutubeBridge extends BridgeAbstract $item['timestamp'] = $time; $item['uri'] = self::URI . 'watch?v=' . $vid; if (!$thumbnail) { - $thumbnail = '0'; // Fallback to default thumbnail if there aren't any provided. + // Fallback to default thumbnail if there aren't any provided. + $thumbnail = '0'; } $thumbnailUri = str_replace('/www.', '/img.', self::URI) . 'vi/' . $vid . '/' . $thumbnail . '.jpg'; $item['content'] = '
' . $desc; @@ -315,111 +448,6 @@ class YoutubeBridge extends BridgeAbstract } } - public function collectData() - { - $xml = ''; - $html = ''; - $url_feed = ''; - $url_listing = ''; - - if ($this->getInput('u')) { /* User and Channel modes */ - $this->request = $this->getInput('u'); - $url_feed = self::URI . 'feeds/videos.xml?user=' . urlencode($this->request); - $url_listing = self::URI . 'user/' . urlencode($this->request) . '/videos'; - } elseif ($this->getInput('c')) { - $this->request = $this->getInput('c'); - $url_feed = self::URI . 'feeds/videos.xml?channel_id=' . urlencode($this->request); - $url_listing = self::URI . 'channel/' . urlencode($this->request) . '/videos'; - } elseif ($this->getInput('custom')) { - $this->request = $this->getInput('custom'); - $url_listing = self::URI . urlencode($this->request) . '/videos'; - } - - if (!empty($url_feed) || !empty($url_listing)) { - $this->feeduri = $url_listing; - if (!empty($this->getInput('custom'))) { - $html = $this->ytGetSimpleHTMLDOM($url_listing); - $jsonData = $this->getJSONData($html); - $url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl; - $this->iconURL = $jsonData->metadata->channelMetadataRenderer->avatar->thumbnails[0]->url; - } - if (!$this->skipFeeds()) { - $html = $this->ytGetSimpleHTMLDOM($url_feed); - $this->ytBridgeParseXmlFeed($html); - } else { - if (empty($this->getInput('custom'))) { - $html = $this->ytGetSimpleHTMLDOM($url_listing); - $jsonData = $this->getJSONData($html); - } - $channel_id = ''; - if (isset($jsonData->contents)) { - $channel_id = $jsonData->metadata->channelMetadataRenderer->externalId; - $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1]; - $jsonData = $jsonData->tabRenderer->content->richGridRenderer->contents; - // $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items; - $this->parseJSONListing($jsonData); - } else { - returnServerError('Unable to get data from YouTube. Username/Channel: ' . $this->request); - } - } - $this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); - } elseif ($this->getInput('p')) { /* playlist mode */ - // TODO: this mode makes a lot of excess video query requests. - // To make less requests, we need to cache following dictionary "videoId -> datePublished, duration" - // This cache will be used to find out, which videos to fetch - // to make feed of 15 items or more, if there a lot of videos published on that date. - $this->request = $this->getInput('p'); - $url_feed = self::URI . 'feeds/videos.xml?playlist_id=' . urlencode($this->request); - $url_listing = self::URI . 'playlist?list=' . urlencode($this->request); - $html = $this->ytGetSimpleHTMLDOM($url_listing); - $jsonData = $this->getJSONData($html); - // TODO: this method returns only first 100 video items - // if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element - $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0]; - $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer; - $jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents; - $item_count = count($jsonData); - - if ($item_count <= 15 && !$this->skipFeeds() && ($xml = $this->ytGetSimpleHTMLDOM($url_feed))) { - $this->ytBridgeParseXmlFeed($xml); - } else { - $this->parseJSONListing($jsonData); - } - $this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName() - usort($this->items, function ($item1, $item2) { - if (!is_int($item1['timestamp']) && !is_int($item2['timestamp'])) { - $item1['timestamp'] = strtotime($item1['timestamp']); - $item2['timestamp'] = strtotime($item2['timestamp']); - } - return $item2['timestamp'] - $item1['timestamp']; - }); - } elseif ($this->getInput('s')) { /* search mode */ - $this->request = $this->getInput('s'); - $url_listing = self::URI - . 'results?search_query=' - . urlencode($this->request) - . '&sp=CAI%253D'; - - $html = $this->ytGetSimpleHTMLDOM($url_listing); - - $jsonData = $this->getJSONData($html); - $jsonData = $jsonData->contents->twoColumnSearchResultsRenderer->primaryContents; - $jsonData = $jsonData->sectionListRenderer->contents; - foreach ($jsonData as $data) { // Search result includes some ads, have to filter them - if (isset($data->itemSectionRenderer->contents[0]->videoRenderer)) { - $jsonData = $data->itemSectionRenderer->contents; - break; - } - } - $this->parseJSONListing($jsonData); - $this->feeduri = $url_listing; - $this->feedName = 'Search: ' . $this->request; // feedName will be used by getName() - } else { /* no valid mode */ - returnClientError("You must either specify either:\n - YouTube - username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)"); - } - } - private function skipFeeds() { return ($this->getInput('duration_min') || $this->getInput('duration_max')); @@ -438,14 +466,13 @@ class YoutubeBridge extends BridgeAbstract public function getName() { - // Name depends on queriedContext: switch ($this->queriedContext) { case 'By username': case 'By channel id': case 'By custom name': case 'By playlist Id': case 'Search result': - return htmlspecialchars_decode($this->feedName) . ' - YouTube'; // We already know it's a bridge, right? + return htmlspecialchars_decode($this->feedName) . ' - YouTube'; default: return parent::getName(); } diff --git a/caches/ArrayCache.php b/caches/ArrayCache.php new file mode 100644 index 00000000..efce4f35 --- /dev/null +++ b/caches/ArrayCache.php @@ -0,0 +1,52 @@ +data[$key] ?? null; + if (!$item) { + return $default; + } + $expiration = $item['expiration']; + if ($expiration === 0 || $expiration > time()) { + return $item['value']; + } + $this->delete($key); + return $default; + } + + public function set(string $key, $value, int $ttl = null): void + { + $this->data[$key] = [ + 'key' => $key, + 'value' => $value, + 'expiration' => $ttl === null ? 0 : time() + $ttl, + ]; + } + + public function delete(string $key): void + { + unset($this->data[$key]); + } + + public function clear(): void + { + $this->data = []; + } + + public function prune(): void + { + foreach ($this->data as $key => $item) { + $expiration = $item['expiration']; + if ($expiration === 0 || $expiration > time()) { + continue; + } + $this->delete($key); + } + } +} diff --git a/caches/FileCache.php b/caches/FileCache.php index 6e150cb4..1495971a 100644 --- a/caches/FileCache.php +++ b/caches/FileCache.php @@ -1,13 +1,10 @@ config['path'] = rtrim($this->config['path'], '/') . '/'; } - public function getConfig() + public function get(string $key, $default = null) { - return $this->config; + $cacheFile = $this->createCacheFile($key); + if (!file_exists($cacheFile)) { + return $default; + } + $item = unserialize(file_get_contents($cacheFile)); + if ($item === false) { + Logger::warning(sprintf('Failed to unserialize: %s', $cacheFile)); + $this->delete($key); + return $default; + } + $expiration = $item['expiration']; + if ($expiration === 0 || $expiration > time()) { + return $item['value']; + } + $this->delete($key); + return $default; } - public function loadData(int $timeout = 86400) + public function set($key, $value, int $ttl = null): void { - clearstatcache(); - if (!file_exists($this->getCacheFile())) { - return null; - } - $modificationTime = filemtime($this->getCacheFile()); - if (time() - $timeout < $modificationTime) { - $data = unserialize(file_get_contents($this->getCacheFile())); - if ($data === false) { - Logger::warning(sprintf('Failed to unserialize: %s', $this->getCacheFile())); - // Intentionally not throwing an exception - return null; - } - return $data; - } - // It's a good idea to delete the expired item here, but commented out atm - // unlink($this->getCacheFile()); - return null; - } - - public function saveData($data): void - { - $bytes = file_put_contents($this->getCacheFile(), serialize($data), LOCK_EX); + $item = [ + 'key' => $key, + 'value' => $value, + 'expiration' => $ttl === null ? 0 : time() + $ttl, + ]; + $cacheFile = $this->createCacheFile($key); + $bytes = file_put_contents($cacheFile, serialize($item), LOCK_EX); if ($bytes === false) { - throw new \Exception(sprintf('Failed to write to: %s', $this->getCacheFile())); + // Consider just logging the error here + throw new \Exception(sprintf('Failed to write to: %s', $cacheFile)); } } - public function getTime(): ?int + public function delete(string $key): void { - clearstatcache(); - $cacheFile = $this->getCacheFile(); - if (file_exists($cacheFile)) { - $time = filemtime($cacheFile); - if ($time !== false) { - return $time; - } - return null; - } - - return null; + unlink($this->createCacheFile($key)); } - public function purgeCache(int $timeout = 86400): void + public function clear(): void + { + foreach (scandir($this->config['path']) as $filename) { + $cacheFile = $this->config['path'] . $filename; + $excluded = ['.' => true, '..' => true, '.gitkeep' => true]; + if (isset($excluded[$filename]) || !is_file($cacheFile)) { + continue; + } + unlink($cacheFile); + } + } + + public function prune(): void { if (! $this->config['enable_purge']) { return; } - - $cachePath = $this->getScope(); - if (!file_exists($cachePath)) { - return; - } - $cacheIterator = new \RecursiveIteratorIterator( - new \RecursiveDirectoryIterator($cachePath), - \RecursiveIteratorIterator::CHILD_FIRST - ); - - foreach ($cacheIterator as $cacheFile) { - $basename = $cacheFile->getBasename(); - $excluded = [ - '.' => true, - '..' => true, - '.gitkeep' => true, - ]; - if (isset($excluded[$basename])) { + foreach (scandir($this->config['path']) as $filename) { + $cacheFile = $this->config['path'] . $filename; + $excluded = ['.' => true, '..' => true, '.gitkeep' => true]; + if (isset($excluded[$filename]) || !is_file($cacheFile)) { continue; - } elseif ($cacheFile->isFile()) { - $filepath = $cacheFile->getPathname(); - if (filemtime($filepath) < time() - $timeout) { - // todo: sometimes this file doesn't exists - unlink($filepath); - } } - } - } - - public function setScope(string $scope): void - { - $this->scope = $this->config['path'] . trim($scope, " \t\n\r\0\x0B\\\/") . '/'; - } - - public function setKey(array $key): void - { - $this->key = json_encode($key); - } - - private function getScope() - { - if (is_null($this->scope)) { - throw new \Exception('Call "setScope" first!'); - } - - if (!is_dir($this->scope)) { - if (mkdir($this->scope, 0755, true) !== true) { - throw new \Exception('mkdir: Unable to create file cache folder'); + $item = unserialize(file_get_contents($cacheFile)); + if ($item === false) { + unlink($cacheFile); + continue; } + $expiration = $item['expiration']; + if ($expiration === 0 || $expiration > time()) { + continue; + } + unlink($cacheFile); } - - return $this->scope; } - private function getCacheFile() + private function createCacheFile(string $key): string { - return $this->getScope() . $this->getCacheName(); + return $this->config['path'] . hash('md5', $key) . '.cache'; } - private function getCacheName() + public function getConfig() { - if (is_null($this->key)) { - throw new \Exception('Call "setKey" first!'); - } - - return hash('md5', $this->key) . '.cache'; + return $this->config; } } diff --git a/caches/MemcachedCache.php b/caches/MemcachedCache.php index dcb572c7..78035435 100644 --- a/caches/MemcachedCache.php +++ b/caches/MemcachedCache.php @@ -1,70 +1,36 @@ conn = new \Memcached(); + // This call does not actually connect to server yet + if (!$this->conn->addServer($host, $port)) { + throw new \Exception('Unable to add memcached server'); } - - $section = 'MemcachedCache'; - $host = Configuration::getConfig($section, 'host'); - $port = Configuration::getConfig($section, 'port'); - - if (empty($host) && empty($port)) { - throw new \Exception('Configuration for ' . $section . ' missing.'); - } - if (empty($host)) { - throw new \Exception('"host" param is not set for ' . $section); - } - if (empty($port)) { - throw new \Exception('"port" param is not set for ' . $section); - } - if (!ctype_digit($port)) { - throw new \Exception('"port" param is invalid for ' . $section); - } - - $port = intval($port); - - if ($port < 1 || $port > 65535) { - throw new \Exception('"port" param is invalid for ' . $section); - } - - $conn = new \Memcached(); - $conn->addServer($host, $port) or returnServerError('Could not connect to memcached server'); - $this->conn = $conn; } - public function loadData(int $timeout = 86400) + public function get(string $key, $default = null) { - $value = $this->conn->get($this->getCacheKey()); + $value = $this->conn->get($key); if ($value === false) { - return null; + return $default; } - if (time() - $timeout < $value['time']) { - return $value['data']; - } - return null; + return $value; } - public function saveData($data): void + public function set(string $key, $value, $ttl = null): void { - $value = [ - 'data' => $data, - 'time' => time(), - ]; - $result = $this->conn->set($this->getCacheKey(), $value, $this->expiration); + $expiration = $ttl === null ? 0 : time() + $ttl; + $result = $this->conn->set($key, $value, $expiration); if ($result === false) { Logger::warning('Failed to store an item in memcached', [ - 'scope' => $this->scope, - 'key' => $this->key, - 'expiration' => $this->expiration, + 'key' => $key, 'code' => $this->conn->getLastErrorCode(), 'message' => $this->conn->getLastErrorMessage(), 'number' => $this->conn->getLastErrorErrno(), @@ -73,38 +39,18 @@ class MemcachedCache implements CacheInterface } } - public function getTime(): ?int + public function delete(string $key): void { - $value = $this->conn->get($this->getCacheKey()); - if ($value === false) { - return null; - } - return $value['time']; + $this->conn->delete($key); } - public function purgeCache(int $timeout = 86400): void + public function clear(): void { - // Note: does not purges cache right now - // Just sets cache expiration and leave cache purging for memcached itself - $this->expiration = $timeout; + $this->conn->flush(); } - public function setScope(string $scope): void + public function prune(): void { - $this->scope = $scope; - } - - public function setKey(array $key): void - { - $this->key = json_encode($key); - } - - private function getCacheKey() - { - if (is_null($this->key)) { - throw new \Exception('Call "setKey" first!'); - } - - return 'rss_bridge_cache_' . hash('md5', $this->scope . $this->key . 'A'); + // memcached manages pruning on its own } } diff --git a/caches/NullCache.php b/caches/NullCache.php index fe43fe06..2549b117 100644 --- a/caches/NullCache.php +++ b/caches/NullCache.php @@ -4,28 +4,24 @@ declare(strict_types=1); class NullCache implements CacheInterface { - public function setScope(string $scope): void + public function get(string $key, $default = null) + { + return $default; + } + + public function set(string $key, $value, int $ttl = null): void { } - public function setKey(array $key): void + public function delete(string $key): void { } - public function loadData(int $timeout = 86400) + public function clear(): void { } - public function saveData($data): void - { - } - - public function getTime(): ?int - { - return null; - } - - public function purgeCache(int $timeout = 86400): void + public function prune(): void { } } diff --git a/caches/SQLiteCache.php b/caches/SQLiteCache.php index 92235862..beb33e88 100644 --- a/caches/SQLiteCache.php +++ b/caches/SQLiteCache.php @@ -1,10 +1,10 @@ db->exec("CREATE TABLE storage ('key' BLOB PRIMARY KEY, 'value' BLOB, 'updated' INTEGER)"); } $this->db->busyTimeout($config['timeout']); + // https://www.sqlite.org/pragma.html#pragma_journal_mode + $this->db->exec('PRAGMA journal_mode = wal'); + // https://www.sqlite.org/pragma.html#pragma_synchronous + $this->db->exec('PRAGMA synchronous = NORMAL'); } - public function loadData(int $timeout = 86400) + public function get(string $key, $default = null) { + $cacheKey = $this->createCacheKey($key); $stmt = $this->db->prepare('SELECT value, updated FROM storage WHERE key = :key'); - $stmt->bindValue(':key', $this->getCacheKey()); + $stmt->bindValue(':key', $cacheKey); $result = $stmt->execute(); if (!$result) { - return null; + return $default; } $row = $result->fetchArray(\SQLITE3_ASSOC); if ($row === false) { - return null; + return $default; } - $value = $row['value']; - $modificationTime = $row['updated']; - if (time() - $timeout < $modificationTime) { - $data = unserialize($value); - if ($data === false) { - Logger::error(sprintf("Failed to unserialize: '%s'", mb_substr($value, 0, 100))); - return null; + $expiration = $row['updated']; + if ($expiration === 0 || $expiration > time()) { + $blob = $row['value']; + $value = unserialize($blob); + if ($value === false) { + Logger::error(sprintf("Failed to unserialize: '%s'", mb_substr($blob, 0, 100))); + // delete? + return $default; } - return $data; + return $value; } - // It's a good idea to delete expired cache items. - // However I'm seeing lots of SQLITE_BUSY errors so commented out for now - // $stmt = $this->db->prepare('DELETE FROM storage WHERE key = :key'); - // $stmt->bindValue(':key', $this->getCacheKey()); - // $stmt->execute(); - return null; + // delete? + return $default; } - - public function saveData($data): void + public function set(string $key, $value, int $ttl = null): void { - $blob = serialize($data); - + $cacheKey = $this->createCacheKey($key); + $blob = serialize($value); + $expiration = $ttl === null ? 0 : time() + $ttl; $stmt = $this->db->prepare('INSERT OR REPLACE INTO storage (key, value, updated) VALUES (:key, :value, :updated)'); - $stmt->bindValue(':key', $this->getCacheKey()); + $stmt->bindValue(':key', $cacheKey); $stmt->bindValue(':value', $blob, \SQLITE3_BLOB); - $stmt->bindValue(':updated', time()); - $stmt->execute(); - } - - public function getTime(): ?int - { - $stmt = $this->db->prepare('SELECT updated FROM storage WHERE key = :key'); - $stmt->bindValue(':key', $this->getCacheKey()); + $stmt->bindValue(':updated', $expiration); $result = $stmt->execute(); - if ($result) { - $row = $result->fetchArray(\SQLITE3_ASSOC); - if ($row !== false) { - return $row['updated']; - } - } - return null; + // Unclear whether we should $result->finalize(); here? } - public function purgeCache(int $timeout = 86400): void + public function delete(string $key): void + { + $key = $this->createCacheKey($key); + $stmt = $this->db->prepare('DELETE FROM storage WHERE key = :key'); + $stmt->bindValue(':key', $key); + $result = $stmt->execute(); + } + + public function prune(): void { if (!$this->config['enable_purge']) { return; } - $stmt = $this->db->prepare('DELETE FROM storage WHERE updated < :expired'); - $stmt->bindValue(':expired', time() - $timeout); - $stmt->execute(); + $stmt = $this->db->prepare('DELETE FROM storage WHERE updated <= :now'); + $stmt->bindValue(':now', time()); + $result = $stmt->execute(); } - public function setScope(string $scope): void + public function clear(): void { - $this->scope = $scope; + $this->db->query('DELETE FROM storage'); } - public function setKey(array $key): void + private function createCacheKey($key) { - $this->key = json_encode($key); - } - - private function getCacheKey() - { - return hash('sha1', $this->scope . $this->key, true); + return hash('sha1', $key, true); } } diff --git a/config.default.ini.php b/config.default.ini.php index d0c508f4..52786aef 100644 --- a/config.default.ini.php +++ b/config.default.ini.php @@ -55,7 +55,7 @@ max_filesize = 20 [cache] -; Cache type: file, sqlite, memcached, null +; Cache type: file, sqlite, memcached, array, null type = "file" ; Allow users to specify custom timeout for specific requests. diff --git a/contrib/prepare_release/fetch_contributors.php b/contrib/prepare_release/fetch_contributors.php index cfe2c5b2..dd99229f 100644 --- a/contrib/prepare_release/fetch_contributors.php +++ b/contrib/prepare_release/fetch_contributors.php @@ -15,14 +15,17 @@ while ($next) { /* Collect all contributors */ 'User-Agent' => 'RSS-Bridge', ]; $httpClient = new CurlHttpClient(); - $result = $httpClient->request($url, ['headers' => $headers]); + $response = $httpClient->request($url, ['headers' => $headers]); - foreach (json_decode($result['body']) as $contributor) { + $json = $response->getBody(); + $json_decode = Json::decode($json, false); + foreach ($json_decode as $contributor) { $contributors[] = $contributor; } // Extract links to "next", "last", etc... - $links = explode(',', $result['headers']['link'][0]); + $link1 = $response->getHeader('link'); + $links = explode(',', $link1); $next = false; // Check if there is a link with 'rel="next"' diff --git a/docs/06_Helper_functions/index.md b/docs/06_Helper_functions/index.md index 2f0c513c..31a13953 100644 --- a/docs/06_Helper_functions/index.md +++ b/docs/06_Helper_functions/index.md @@ -5,10 +5,12 @@ The `getInput` function is used to receive a value for a parameter, specified in $this->getInput('your input name here'); ``` -`getInput` will either return the value for your parameter or `null` if the parameter is unknown or not specified. +`getInput` will either return the value for your parameter +or `null` if the parameter is unknown or not specified. # getKey -The `getKey` function is used to receive the key name to a selected list value given the name of the list, specified in `const PARAMETERS` +The `getKey` function is used to receive the key name to a selected list +value given the name of the list, specified in `const PARAMETERS` Is able to work with multidimensional list arrays. ```PHP @@ -34,7 +36,8 @@ $this->getKey('country'); // if the selected value was "ve", this function will return "Venezuela" ``` -`getKey` will either return the key name for your parameter or `null` if the parameter is unknown or not specified. +`getKey` will either return the key name for your parameter or `null` if the parameter +is unknown or not specified. # getContents The `getContents` function uses [cURL](https://secure.php.net/manual/en/book.curl.php) to acquire data from the specified URI while respecting the various settings defined at a global level by RSS-Bridge (i.e., proxy host, user agent, etc.). This function accepts a few parameters: @@ -53,33 +56,29 @@ $html = getContents($url, $header, $opts); ``` # getSimpleHTMLDOM -The `getSimpleHTMLDOM` function is a wrapper for the [simple_html_dom](https://simplehtmldom.sourceforge.io/) [file_get_html](https://simplehtmldom.sourceforge.io/docs/1.9/api/file_get_html/) function in order to provide context by design. +The `getSimpleHTMLDOM` function is a wrapper for the +[simple_html_dom](https://simplehtmldom.sourceforge.io/) [file_get_html](https://simplehtmldom.sourceforge.io/docs/1.9/api/file_get_html/) function in order to provide context by design. ```PHP $html = getSimpleHTMLDOM('your URI'); ``` # getSimpleHTMLDOMCached -The `getSimpleHTMLDOMCached` function does the same as the [`getSimpleHTMLDOM`](#getsimplehtmldom) function, except that the content received for the given URI is stored in a cache and loaded from cache on the next request if the specified cache duration was not reached. Use this function for data that is very unlikely to change between consecutive requests to **RSS-Bridge**. This function allows to specify the cache duration with the second parameter (default is 24 hours / 86400 seconds). +The `getSimpleHTMLDOMCached` function does the same as the +[`getSimpleHTMLDOM`](#getsimplehtmldom) function, +except that the content received for the given URI is stored in a cache +and loaded from cache on the next request if the specified cache duration +was not reached. + +Use this function for data that is very unlikely to change between consecutive requests to **RSS-Bridge**. +This function allows to specify the cache duration with the second parameter. ```PHP $html = getSimpleHTMLDOMCached('your URI', 86400); // Duration 24h ``` -**Notice:** Due to the current implementation a value greater than 86400 seconds (24 hours) will not work as the cache is purged every 24 hours automatically. - -# returnError -**Notice:** Whenever possible make use of [`returnClientError`](#returnclienterror) or [`returnServerError`](#returnservererror) - -The `returnError` function aborts execution of the current bridge and returns the given error message with the provided error number: - -```PHP -returnError('Your error message', 404); -``` - -Check the [list of error codes](https://en.wikipedia.org/wiki/List_of_HTTP_status_codes) for applicable error numbers. - # returnClientError -The `returnClientError` function aborts execution of the current bridge and returns the given error message with error code **400**: +The `returnClientError` function aborts execution of the current bridge +and returns the given error message with error code **400**: ```PHP returnClientError('Your error message') @@ -94,10 +93,12 @@ The `returnServerError` function aborts execution of the current bridge and retu returnServerError('Your error message') ``` -Use this function when a problem occurs that has nothing to do with the parameters provided by the user. (like: Host service gone missing, empty data received, etc...) +Use this function when a problem occurs that has nothing to do with the parameters provided by the user. +(like: Host service gone missing, empty data received, etc...) # defaultLinkTo -Automatically replaces any relative URL in a given string or DOM object (i.e. the one returned by [getSimpleHTMLDOM](#getsimplehtmldom)) with an absolute URL. +Automatically replaces any relative URL in a given string or DOM object +(i.e. the one returned by [getSimpleHTMLDOM](#getsimplehtmldom)) with an absolute URL. ```php defaultLinkTo ( mixed $content, string $server ) : object diff --git a/docs/07_Cache_API/02_CacheInterface.md b/docs/07_Cache_API/02_CacheInterface.md index 61127a0d..3e71237d 100644 --- a/docs/07_Cache_API/02_CacheInterface.md +++ b/docs/07_Cache_API/02_CacheInterface.md @@ -3,16 +3,14 @@ See `CacheInterface`. ```php interface CacheInterface { - public function setScope(string $scope): void; + public function get(string $key, $default = null); - public function setKey(array $key): void; + public function set(string $key, $value, int $ttl = null): void; - public function loadData(); + public function delete(string $key): void; - public function saveData($data): void; + public function clear(): void; - public function getTime(): ?int; - - public function purgeCache(int $seconds): void; + public function prune(): void; } -``` \ No newline at end of file +``` diff --git a/docs/index.md b/docs/index.md index 71fa9f37..c370cb1b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,4 +1,8 @@ -**RSS-Bridge** is free and open source software for generating Atom or RSS feeds from websites which don't have one. It is written in PHP and intended to run on a Web server. See the [Screenshots](01_General/04_Screenshots.md) for a quick introduction to **RSS-Bridge** +RSS-Bridge is a web application. + +It generates web feeds for websites that don't have one. + +Officially hosted instance: https://rss-bridge.org/bridge01/ - You want to know more about **RSS-Bridge**? Check out our **[project goals](01_General/01_Project-goals.md)**. diff --git a/index.php b/index.php index 538f1c6e..9181c0b0 100644 --- a/index.php +++ b/index.php @@ -1,5 +1,9 @@ setScope($this->getShortName()); - $cache->setKey([$key]); - return $cache->loadData($timeout); + $cacheKey = $this->getShortName() . '_' . $key; + return $cache->get($cacheKey); } /** @@ -426,12 +428,11 @@ abstract class BridgeAbstract implements BridgeInterface * * @param mixed $value Value to cache */ - protected function saveCacheValue(string $key, $value) + protected function saveCacheValue(string $key, $value, $ttl = 86400) { $cache = RssBridge::getCache(); - $cache->setScope($this->getShortName()); - $cache->setKey([$key]); - $cache->saveData($value); + $cacheKey = $this->getShortName() . '_' . $key; + $cache->set($cacheKey, $value, $ttl); } public function getShortName(): string diff --git a/lib/BridgeInterface.php b/lib/BridgeInterface.php index 977ad7f6..63bc7b70 100644 --- a/lib/BridgeInterface.php +++ b/lib/BridgeInterface.php @@ -57,6 +57,8 @@ interface BridgeInterface { /** * Collects data from the site + * + * @return void */ public function collectData(); diff --git a/lib/CacheFactory.php b/lib/CacheFactory.php index 78a0e83e..3f076d83 100644 --- a/lib/CacheFactory.php +++ b/lib/CacheFactory.php @@ -72,7 +72,29 @@ class CacheFactory 'enable_purge' => Configuration::getConfig('SQLiteCache', 'enable_purge'), ]); case MemcachedCache::class: - return new MemcachedCache(); + if (!extension_loaded('memcached')) { + throw new \Exception('"memcached" extension not loaded. Please check "php.ini"'); + } + $section = 'MemcachedCache'; + $host = Configuration::getConfig($section, 'host'); + $port = Configuration::getConfig($section, 'port'); + if (empty($host) && empty($port)) { + throw new \Exception('Configuration for ' . $section . ' missing.'); + } + if (empty($host)) { + throw new \Exception('"host" param is not set for ' . $section); + } + if (empty($port)) { + throw new \Exception('"port" param is not set for ' . $section); + } + if (!ctype_digit($port)) { + throw new \Exception('"port" param is invalid for ' . $section); + } + $port = intval($port); + if ($port < 1 || $port > 65535) { + throw new \Exception('"port" param is invalid for ' . $section); + } + return new MemcachedCache($host, $port); default: if (!file_exists(PATH_LIB_CACHES . $className . '.php')) { throw new \Exception('Unable to find the cache file'); diff --git a/lib/CacheInterface.php b/lib/CacheInterface.php index 85aa830f..0009a55c 100644 --- a/lib/CacheInterface.php +++ b/lib/CacheInterface.php @@ -2,15 +2,13 @@ interface CacheInterface { - public function setScope(string $scope): void; + public function get(string $key, $default = null); - public function setKey(array $key): void; + public function set(string $key, $value, int $ttl = null): void; - public function loadData(int $timeout = 86400); + public function delete(string $key): void; - public function saveData($data): void; + public function clear(): void; - public function getTime(): ?int; - - public function purgeCache(int $timeout = 86400): void; + public function prune(): void; } diff --git a/lib/Configuration.php b/lib/Configuration.php index f5615009..7ef97fa7 100644 --- a/lib/Configuration.php +++ b/lib/Configuration.php @@ -37,10 +37,6 @@ final class Configuration */ public static function verifyInstallation() { - if (version_compare(\PHP_VERSION, '7.4.0') === -1) { - throw new \Exception('RSS-Bridge requires at least PHP version 7.4.0!'); - } - $errors = []; // OpenSSL: https://www.php.net/manual/en/book.openssl.php @@ -211,6 +207,9 @@ final class Configuration if (!is_string(self::getConfig('error', 'output'))) { self::throwConfigError('error', 'output', 'Is not a valid String'); } + if (!in_array(self::getConfig('error', 'output'), ['feed', 'http', 'none'])) { + self::throwConfigError('error', 'output', 'Invalid output'); + } if ( !is_numeric(self::getConfig('error', 'report_limit')) diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index c91586d7..be467336 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -100,8 +100,8 @@ abstract class FeedExpander extends BridgeAbstract '*/*', ]; $httpHeaders = ['Accept: ' . implode(', ', $mimeTypes)]; - $content = getContents($url, $httpHeaders); - if ($content === '') { + $xml = getContents($url, $httpHeaders); + if ($xml === '') { throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10); } // Maybe move this call earlier up the stack frames @@ -109,7 +109,7 @@ abstract class FeedExpander extends BridgeAbstract libxml_use_internal_errors(true); // Consider replacing libxml with https://www.php.net/domdocument // Intentionally not using the silencing operator (@) because it has no effect here - $rssContent = simplexml_load_string(trim($content)); + $rssContent = simplexml_load_string(trim($xml)); if ($rssContent === false) { $xmlErrors = libxml_get_errors(); foreach ($xmlErrors as $xmlError) { diff --git a/lib/FormatInterface.php b/lib/FormatInterface.php index c0355804..49e36933 100644 --- a/lib/FormatInterface.php +++ b/lib/FormatInterface.php @@ -28,15 +28,7 @@ interface FormatInterface */ public function stringify(); - /** - * Set items - * - * @param array $bridges The items - * @return self The format object - * - * @todo Rename parameter `$bridges` to `$items` - */ - public function setItems(array $bridges); + public function setItems(array $items); /** * Return items diff --git a/lib/Logger.php b/lib/Logger.php index 5423f62c..073fedee 100644 --- a/lib/Logger.php +++ b/lib/Logger.php @@ -66,13 +66,24 @@ final class Logger } } } - // Intentionally not sanitizing $message + + if ($context) { + try { + $context = Json::encode($context); + } catch (\JsonException $e) { + $context['message'] = null; + $context = Json::encode($context); + } + } else { + $context = ''; + } $text = sprintf( "[%s] rssbridge.%s %s %s\n", now()->format('Y-m-d H:i:s'), $level, + // Intentionally not sanitizing $message $message, - $context ? Json::encode($context) : '' + $context ); // Log to stderr/stdout whatever that is @@ -81,6 +92,6 @@ final class Logger // Log to file // todo: extract to log handler - // file_put_contents('/tmp/rss-bridge.log', $text, FILE_APPEND | LOCK_EX); + //$bytes = file_put_contents('/tmp/rss-bridge.log', $text, FILE_APPEND | LOCK_EX); } } diff --git a/lib/RssBridge.php b/lib/RssBridge.php index 8969dc54..1c6ce464 100644 --- a/lib/RssBridge.php +++ b/lib/RssBridge.php @@ -5,25 +5,7 @@ final class RssBridge private static HttpClient $httpClient; private static CacheInterface $cache; - public function main(array $argv = []) - { - if ($argv) { - parse_str(implode('&', array_slice($argv, 1)), $cliArgs); - $request = $cliArgs; - } else { - $request = array_merge($_GET, $_POST); - } - - try { - $this->run($request); - } catch (\Throwable $e) { - Logger::error(sprintf('Exception in RssBridge::main(): %s', create_sane_exception_message($e)), ['e' => $e]); - http_response_code(500); - print render(__DIR__ . '/../templates/error.html.php', ['e' => $e]); - } - } - - private function run($request): void + public function __construct() { Configuration::verifyInstallation(); @@ -33,6 +15,13 @@ final class RssBridge } Configuration::loadConfiguration($customConfig, getenv()); + set_exception_handler(function (\Throwable $e) { + Logger::error('Uncaught Exception', ['e' => $e]); + http_response_code(500); + print render(__DIR__ . '/../templates/error.html.php', ['e' => $e]); + exit(1); + }); + set_error_handler(function ($code, $message, $file, $line) { if ((error_reporting() & $code) === 0) { return false; @@ -45,7 +34,6 @@ final class RssBridge ); Logger::warning($text); if (Debug::isEnabled()) { - // todo: extract to log handler print sprintf("
%s
\n", e($text)); } }); @@ -72,38 +60,58 @@ final class RssBridge // Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED); date_default_timezone_set(Configuration::getConfig('system', 'timezone')); - $cacheFactory = new CacheFactory(); - self::$httpClient = new CurlHttpClient(); - self::$cache = $cacheFactory->create(); + + $cacheFactory = new CacheFactory(); + if (Debug::isEnabled()) { + self::$cache = $cacheFactory->create('array'); + } else { + self::$cache = $cacheFactory->create(); + } if (Configuration::getConfig('authentication', 'enable')) { $authenticationMiddleware = new AuthenticationMiddleware(); $authenticationMiddleware(); } + } - foreach ($request as $key => $value) { - if (!is_string($value)) { - throw new \Exception("Query parameter \"$key\" is not a string."); + public function main(array $argv = []): void + { + if ($argv) { + parse_str(implode('&', array_slice($argv, 1)), $cliArgs); + $request = $cliArgs; + } else { + $request = array_merge($_GET, $_POST); + } + + try { + foreach ($request as $key => $value) { + if (!is_string($value)) { + throw new \Exception("Query parameter \"$key\" is not a string."); + } } - } - $actionName = $request['action'] ?? 'Frontpage'; - $actionName = strtolower($actionName) . 'Action'; - $actionName = implode(array_map('ucfirst', explode('-', $actionName))); + $actionName = $request['action'] ?? 'Frontpage'; + $actionName = strtolower($actionName) . 'Action'; + $actionName = implode(array_map('ucfirst', explode('-', $actionName))); - $filePath = __DIR__ . '/../actions/' . $actionName . '.php'; - if (!file_exists($filePath)) { - throw new \Exception(sprintf('Invalid action: %s', $actionName)); - } - $className = '\\' . $actionName; - $action = new $className(); + $filePath = __DIR__ . '/../actions/' . $actionName . '.php'; + if (!file_exists($filePath)) { + throw new \Exception('Invalid action', 400); + } + $className = '\\' . $actionName; + $action = new $className(); - $response = $action->execute($request); - if (is_string($response)) { - print $response; - } elseif ($response instanceof Response) { - $response->send(); + $response = $action->execute($request); + if (is_string($response)) { + print $response; + } elseif ($response instanceof Response) { + $response->send(); + } + } catch (\Throwable $e) { + Logger::error('Exception in RssBridge::main()', ['e' => $e]); + http_response_code(500); + print render(__DIR__ . '/../templates/error.html.php', ['e' => $e]); } } @@ -114,6 +122,12 @@ final class RssBridge public static function getCache(): CacheInterface { - return self::$cache; + return self::$cache ?? new NullCache(); + } + + public function clearCache() + { + $cache = self::getCache(); + $cache->clear(); } } diff --git a/lib/TwitterClient.php b/lib/TwitterClient.php index 20f21482..f71e842c 100644 --- a/lib/TwitterClient.php +++ b/lib/TwitterClient.php @@ -12,11 +12,9 @@ class TwitterClient { $this->cache = $cache; - $cache->setScope('twitter'); - $cache->setKey(['cache']); - $cache->purgeCache(60 * 60 * 3); + $data = $this->cache->get('twitter') ?? []; + $this->data = $data; - $this->data = $this->cache->loadData() ?? []; $this->authorization = 'AAAAAAAAAAAAAAAAAAAAAGHtAgAAAAAA%2Bx7ILXNILCqkSGIzy6faIHZ9s3Q%3DQy97w6SIrzE7lQwPJEYQBsArEE2fC25caFwRBvAGi456G09vGR'; $this->tw_consumer_key = '3nVuSoBZnx6U4vzUxf5w'; $this->tw_consumer_secret = 'Bcs59EFbbsdF6Sl9Ng71smgStWEGwXXKSjYvPVt7qys'; @@ -273,9 +271,7 @@ class TwitterClient $guest_token = json_decode($response)->guest_token; $this->data['guest_token'] = $guest_token; - $this->cache->setScope('twitter'); - $this->cache->setKey(['cache']); - $this->cache->saveData($this->data); + $this->cache->set('twitter', $this->data); } private function fetchUserInfoByScreenName(string $screenName) @@ -299,9 +295,7 @@ class TwitterClient $userInfo = $response->data->user; $this->data[$screenName] = $userInfo; - $this->cache->setScope('twitter'); - $this->cache->setKey(['cache']); - $this->cache->saveData($this->data); + $this->cache->set('twitter', $this->data); return $userInfo; } @@ -434,9 +428,7 @@ class TwitterClient $listInfo = $response->data->user_by_screen_name->list; $this->data[$screenName . '-' . $listSlug] = $listInfo; - $this->cache->setScope('twitter'); - $this->cache->setKey(['cache']); - $this->cache->saveData($this->data); + $this->cache->set('twitter', $this->data); return $listInfo; } diff --git a/lib/bootstrap.php b/lib/bootstrap.php index e05dd94a..ca6cecdb 100644 --- a/lib/bootstrap.php +++ b/lib/bootstrap.php @@ -39,10 +39,10 @@ const MAX_FILE_SIZE = 10000000; // Files $files = [ __DIR__ . '/../lib/html.php', - __DIR__ . '/../lib/error.php', __DIR__ . '/../lib/contents.php', __DIR__ . '/../lib/php8backports.php', __DIR__ . '/../lib/utils.php', + __DIR__ . '/../lib/http.php', // Vendor __DIR__ . '/../vendor/parsedown/Parsedown.php', __DIR__ . '/../vendor/php-urljoin/src/urljoin.php', diff --git a/lib/contents.php b/lib/contents.php index c842ccbc..c1847758 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -1,101 +1,11 @@ 'Continue', - '101' => 'Switching Protocols', - '200' => 'OK', - '201' => 'Created', - '202' => 'Accepted', - '203' => 'Non-Authoritative Information', - '204' => 'No Content', - '205' => 'Reset Content', - '206' => 'Partial Content', - '300' => 'Multiple Choices', - '301' => 'Moved Permanently', - '302' => 'Found', - '303' => 'See Other', - '304' => 'Not Modified', - '305' => 'Use Proxy', - '400' => 'Bad Request', - '401' => 'Unauthorized', - '402' => 'Payment Required', - '403' => 'Forbidden', - '404' => 'Not Found', - '405' => 'Method Not Allowed', - '406' => 'Not Acceptable', - '407' => 'Proxy Authentication Required', - '408' => 'Request Timeout', - '409' => 'Conflict', - '410' => 'Gone', - '411' => 'Length Required', - '412' => 'Precondition Failed', - '413' => 'Request Entity Too Large', - '414' => 'Request-URI Too Long', - '415' => 'Unsupported Media Type', - '416' => 'Requested Range Not Satisfiable', - '417' => 'Expectation Failed', - '429' => 'Too Many Requests', - '500' => 'Internal Server Error', - '501' => 'Not Implemented', - '502' => 'Bad Gateway', - '503' => 'Service Unavailable', - '504' => 'Gateway Timeout', - '505' => 'HTTP Version Not Supported' - ]; - private string $body; - private int $code; - private array $headers; - - public function __construct( - string $body = '', - int $code = 200, - array $headers = [] - ) { - $this->body = $body; - $this->code = $code; - $this->headers = $headers; - } - - public function getBody() - { - return $this->body; - } - - public function getCode() - { - return $this->code; - } - - public function getHeaders() - { - return $this->headers; - } - - public function send(): void - { - http_response_code($this->code); - foreach ($this->headers as $name => $value) { - header(sprintf('%s: %s', $name, $value)); - } - print $this->body; - } -} - /** * Fetch data from an http url * * @param array $httpHeaders E.g. ['Content-type: text/plain'] * @param array $curlOptions Associative array e.g. [CURLOPT_MAXREDIRS => 3] - * @param bool $returnFull Whether to return an array: - * [ - * 'code' => int, - * 'header' => array, - * 'content' => string, - * 'status_lines' => array, - * ] - + * @param bool $returnFull Whether to return an array: ['code' => int, 'headers' => array, 'content' => string] * @return string|array */ function getContents( @@ -142,30 +52,35 @@ function getContents( } $cache = RssBridge::getCache(); - $cache->setScope('server'); - $cache->setKey([$url]); + $cacheKey = 'server_' . $url; - if (!Debug::isEnabled() && $cache->getTime() && $cache->loadData(86400 * 7)) { - $config['if_not_modified_since'] = $cache->getTime(); + /** @var Response $cachedResponse */ + $cachedResponse = $cache->get($cacheKey); + if ($cachedResponse) { + // considering popping + $cachedLastModified = $cachedResponse->getHeader('last-modified'); + if ($cachedLastModified) { + $cachedLastModified = new \DateTimeImmutable($cachedLastModified); + $config['if_not_modified_since'] = $cachedLastModified->getTimestamp(); + } } $response = $httpClient->request($url, $config); - switch ($response['code']) { + switch ($response->getCode()) { case 200: case 201: case 202: - if (isset($response['headers']['cache-control'])) { - $cachecontrol = $response['headers']['cache-control']; - $lastValue = array_pop($cachecontrol); - $directives = explode(',', $lastValue); + $cacheControl = $response->getHeader('cache-control'); + if ($cacheControl) { + $directives = explode(',', $cacheControl); $directives = array_map('trim', $directives); if (in_array('no-cache', $directives) || in_array('no-store', $directives)) { // Don't cache as instructed by the server break; } } - $cache->saveData($response['body']); + $cache->set($cacheKey, $response, 86400 * 10); break; case 301: case 302: @@ -174,16 +89,16 @@ function getContents( break; case 304: // Not Modified - $response['body'] = $cache->loadData(86400 * 7); + $response = $response->withBody($cachedResponse->getBody()); break; default: $exceptionMessage = sprintf( '%s resulted in %s %s %s', $url, - $response['code'], - Response::STATUS_CODES[$response['code']] ?? '', + $response->getCode(), + $response->getStatusLine(), // If debug, include a part of the response body in the exception message - Debug::isEnabled() ? mb_substr($response['body'], 0, 500) : '', + Debug::isEnabled() ? mb_substr($response->getBody(), 0, 500) : '', ); // The following code must be extracted if it grows too much @@ -194,141 +109,21 @@ function getContents( 'Security | Glassdoor', ]; foreach ($cloudflareTitles as $cloudflareTitle) { - if (str_contains($response['body'], $cloudflareTitle)) { - throw new CloudFlareException($exceptionMessage, $response['code']); + if (str_contains($response->getBody(), $cloudflareTitle)) { + throw new CloudFlareException($exceptionMessage, $response->getCode()); } } - throw new HttpException(trim($exceptionMessage), $response['code']); + throw new HttpException(trim($exceptionMessage), $response->getCode()); } if ($returnFull === true) { - // For legacy reasons, use content instead of body - $response['content'] = $response['body']; - unset($response['body']); - return $response; - } - return $response['body']; -} - -interface HttpClient -{ - public function request(string $url, array $config = []): array; -} - -final class CurlHttpClient implements HttpClient -{ - public function request(string $url, array $config = []): array - { - $defaults = [ - 'useragent' => null, - 'timeout' => 5, - 'headers' => [], - 'proxy' => null, - 'curl_options' => [], - 'if_not_modified_since' => null, - 'retries' => 3, - 'max_filesize' => null, - 'max_redirections' => 5, - ]; - $config = array_merge($defaults, $config); - - $ch = curl_init($url); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); - curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']); - curl_setopt($ch, CURLOPT_HEADER, false); - $httpHeaders = []; - foreach ($config['headers'] as $name => $value) { - $httpHeaders[] = sprintf('%s: %s', $name, $value); - } - curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders); - if ($config['useragent']) { - curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']); - } - curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']); - curl_setopt($ch, CURLOPT_ENCODING, ''); - curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); - - if ($config['max_filesize']) { - // This option inspects the Content-Length header - curl_setopt($ch, CURLOPT_MAXFILESIZE, $config['max_filesize']); - curl_setopt($ch, CURLOPT_NOPROGRESS, false); - // This progress function will monitor responses who omit the Content-Length header - curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($ch, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($config) { - if ($downloaded > $config['max_filesize']) { - // Return a non-zero value to abort the transfer - return -1; - } - return 0; - }); - } - - if ($config['proxy']) { - curl_setopt($ch, CURLOPT_PROXY, $config['proxy']); - } - if (curl_setopt_array($ch, $config['curl_options']) === false) { - throw new \Exception('Tried to set an illegal curl option'); - } - - if ($config['if_not_modified_since']) { - curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']); - curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE); - } - - $responseStatusLines = []; - $responseHeaders = []; - curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) { - $len = strlen($rawHeader); - if ($rawHeader === "\r\n") { - return $len; - } - if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) { - $responseStatusLines[] = $rawHeader; - return $len; - } - $header = explode(':', $rawHeader); - if (count($header) === 1) { - return $len; - } - $name = mb_strtolower(trim($header[0])); - $value = trim(implode(':', array_slice($header, 1))); - if (!isset($responseHeaders[$name])) { - $responseHeaders[$name] = []; - } - $responseHeaders[$name][] = $value; - return $len; - }); - - $attempts = 0; - while (true) { - $attempts++; - $data = curl_exec($ch); - if ($data !== false) { - // The network call was successful, so break out of the loop - break; - } - if ($attempts > $config['retries']) { - // Finally give up - $curl_error = curl_error($ch); - $curl_errno = curl_errno($ch); - throw new HttpException(sprintf( - 'cURL error %s: %s (%s) for %s', - $curl_error, - $curl_errno, - 'https://curl.haxx.se/libcurl/c/libcurl-errors.html', - $url - )); - } - } - - $statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); - curl_close($ch); return [ - 'code' => $statusCode, - 'status_lines' => $responseStatusLines, - 'headers' => $responseHeaders, - 'body' => $data, + 'code' => $response->getCode(), + 'headers' => $response->getHeaders(), + // For legacy reasons, use 'content' instead of 'body' + 'content' => $response->getBody(), ]; } + return $response->getBody(); } /** @@ -391,7 +186,7 @@ function getSimpleHTMLDOM( * _Notice_: Cached contents are forcefully removed after 24 hours (86400 seconds). * * @param string $url The URL. - * @param int $timeout Cache duration in seconds. + * @param int $ttl Cache duration in seconds. * @param array $header (optional) A list of cURL header. * For more information follow the links below. * * https://php.net/manual/en/function.curl-setopt.php @@ -416,7 +211,7 @@ function getSimpleHTMLDOM( */ function getSimpleHTMLDOMCached( $url, - $timeout = 86400, + $ttl = 86400, $header = [], $opts = [], $lowercase = true, @@ -427,14 +222,11 @@ function getSimpleHTMLDOMCached( $defaultSpanText = DEFAULT_SPAN_TEXT ) { $cache = RssBridge::getCache(); - $cache->setScope('pages'); - $cache->setKey([$url]); - $content = $cache->loadData($timeout); - if (!$content || Debug::isEnabled()) { + $cacheKey = 'pages_' . $url; + $content = $cache->get($cacheKey); + if (!$content) { $content = getContents($url, $header ?? [], $opts ?? []); - $cache->setScope('pages'); - $cache->setKey([$url]); - $cache->saveData($content); + $cache->set($cacheKey, $content, $ttl); } return str_get_html( $content, diff --git a/lib/error.php b/lib/error.php deleted file mode 100644 index 4439fb38..00000000 --- a/lib/error.php +++ /dev/null @@ -1,47 +0,0 @@ -<?php - -/** - * This file is part of RSS-Bridge, a PHP project capable of generating RSS and - * Atom feeds for websites that don't have one. - * - * For the full license information, please view the UNLICENSE file distributed - * with this source code. - * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge - */ - -/** - * Throws an exception when called. - * - * @throws \Exception when called - * @param string $message The error message - * @param int $code The HTTP error code - * @link https://en.wikipedia.org/wiki/List_of_HTTP_status_codes List of HTTP - * status codes - */ -function returnError($message, $code) -{ - throw new \Exception($message, $code); -} - -/** - * Returns HTTP Error 400 (Bad Request) when called. - * - * @param string $message The error message - */ -function returnClientError($message) -{ - returnError($message, 400); -} - -/** - * Returns HTTP Error 500 (Internal Server Error) when called. - * - * @param string $message The error message - */ -function returnServerError($message) -{ - returnError($message, 500); -} diff --git a/lib/http.php b/lib/http.php new file mode 100644 index 00000000..c5e65e77 --- /dev/null +++ b/lib/http.php @@ -0,0 +1,252 @@ +<?php + +class HttpException extends \Exception +{ +} + +final class CloudFlareException extends HttpException +{ +} + +interface HttpClient +{ + public function request(string $url, array $config = []): Response; +} + +final class CurlHttpClient implements HttpClient +{ + public function request(string $url, array $config = []): Response + { + $defaults = [ + 'useragent' => null, + 'timeout' => 5, + 'headers' => [], + 'proxy' => null, + 'curl_options' => [], + 'if_not_modified_since' => null, + 'retries' => 3, + 'max_filesize' => null, + 'max_redirections' => 5, + ]; + $config = array_merge($defaults, $config); + + $ch = curl_init($url); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']); + curl_setopt($ch, CURLOPT_HEADER, false); + $httpHeaders = []; + foreach ($config['headers'] as $name => $value) { + $httpHeaders[] = sprintf('%s: %s', $name, $value); + } + curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders); + if ($config['useragent']) { + curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']); + } + curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']); + curl_setopt($ch, CURLOPT_ENCODING, ''); + curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); + + if ($config['max_filesize']) { + // This option inspects the Content-Length header + curl_setopt($ch, CURLOPT_MAXFILESIZE, $config['max_filesize']); + curl_setopt($ch, CURLOPT_NOPROGRESS, false); + // This progress function will monitor responses who omit the Content-Length header + curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($ch, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($config) { + if ($downloaded > $config['max_filesize']) { + // Return a non-zero value to abort the transfer + return -1; + } + return 0; + }); + } + + if ($config['proxy']) { + curl_setopt($ch, CURLOPT_PROXY, $config['proxy']); + } + if (curl_setopt_array($ch, $config['curl_options']) === false) { + throw new \Exception('Tried to set an illegal curl option'); + } + + if ($config['if_not_modified_since']) { + curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']); + curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE); + } + + $responseStatusLines = []; + $responseHeaders = []; + curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) { + $len = strlen($rawHeader); + if ($rawHeader === "\r\n") { + return $len; + } + if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) { + $responseStatusLines[] = trim($rawHeader); + return $len; + } + $header = explode(':', $rawHeader); + if (count($header) === 1) { + return $len; + } + $name = mb_strtolower(trim($header[0])); + $value = trim(implode(':', array_slice($header, 1))); + if (!isset($responseHeaders[$name])) { + $responseHeaders[$name] = []; + } + $responseHeaders[$name][] = $value; + return $len; + }); + + $attempts = 0; + while (true) { + $attempts++; + $data = curl_exec($ch); + if ($data !== false) { + // The network call was successful, so break out of the loop + break; + } + if ($attempts > $config['retries']) { + // Finally give up + $curl_error = curl_error($ch); + $curl_errno = curl_errno($ch); + throw new HttpException(sprintf( + 'cURL error %s: %s (%s) for %s', + $curl_error, + $curl_errno, + 'https://curl.haxx.se/libcurl/c/libcurl-errors.html', + $url + )); + } + } + + $statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + curl_close($ch); + return new Response($data, $statusCode, $responseHeaders); + } +} + +final class Response +{ + public const STATUS_CODES = [ + '100' => 'Continue', + '101' => 'Switching Protocols', + '200' => 'OK', + '201' => 'Created', + '202' => 'Accepted', + '203' => 'Non-Authoritative Information', + '204' => 'No Content', + '205' => 'Reset Content', + '206' => 'Partial Content', + '300' => 'Multiple Choices', + '301' => 'Moved Permanently', + '302' => 'Found', + '303' => 'See Other', + '304' => 'Not Modified', + '305' => 'Use Proxy', + '400' => 'Bad Request', + '401' => 'Unauthorized', + '402' => 'Payment Required', + '403' => 'Forbidden', + '404' => 'Not Found', + '405' => 'Method Not Allowed', + '406' => 'Not Acceptable', + '407' => 'Proxy Authentication Required', + '408' => 'Request Timeout', + '409' => 'Conflict', + '410' => 'Gone', + '411' => 'Length Required', + '412' => 'Precondition Failed', + '413' => 'Request Entity Too Large', + '414' => 'Request-URI Too Long', + '415' => 'Unsupported Media Type', + '416' => 'Requested Range Not Satisfiable', + '417' => 'Expectation Failed', + '429' => 'Too Many Requests', + '500' => 'Internal Server Error', + '501' => 'Not Implemented', + '502' => 'Bad Gateway', + '503' => 'Service Unavailable', + '504' => 'Gateway Timeout', + '505' => 'HTTP Version Not Supported' + ]; + private string $body; + private int $code; + private array $headers; + + public function __construct( + string $body = '', + int $code = 200, + array $headers = [] + ) { + $this->body = $body; + $this->code = $code; + $this->headers = []; + + foreach ($headers as $name => $value) { + $name = mb_strtolower($name); + if (!isset($this->headers[$name])) { + $this->headers[$name] = []; + } + if (is_string($value)) { + $this->headers[$name][] = $value; + } + if (is_array($value)) { + $this->headers[$name] = $value; + } + } + } + + public function getBody() + { + return $this->body; + } + + public function getCode() + { + return $this->code; + } + + public function getStatusLine(): string + { + return self::STATUS_CODES[$this->code] ?? ''; + } + + public function getHeaders() + { + return $this->headers; + } + + /** + * @return string[]|string|null + */ + public function getHeader(string $name, bool $all = false) + { + $name = mb_strtolower($name); + $header = $this->headers[$name] ?? null; + if (!$header) { + return null; + } + if ($all) { + return $header; + } + return array_pop($header); + } + + public function withBody(string $body): Response + { + $clone = clone $this; + $clone->body = $body; + return $clone; + } + + public function send(): void + { + http_response_code($this->code); + foreach ($this->headers as $name => $values) { + foreach ($values as $value) { + header(sprintf('%s: %s', $name, $value)); + } + } + print $this->body; + } +} diff --git a/lib/utils.php b/lib/utils.php index 94f928cd..4c58d258 100644 --- a/lib/utils.php +++ b/lib/utils.php @@ -1,18 +1,17 @@ <?php -class HttpException extends \Exception -{ -} - -final class CloudFlareException extends HttpException -{ -} - +// https://github.com/nette/utils/blob/master/src/Utils/Json.php final class Json { - public static function encode($value): string + public static function encode($value, $pretty = true, bool $asciiSafe = false): string { - $flags = JSON_PRETTY_PRINT | JSON_THROW_ON_ERROR | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE; + $flags = JSON_THROW_ON_ERROR | JSON_UNESCAPED_SLASHES; + if (!$asciiSafe) { + $flags = $flags | JSON_UNESCAPED_UNICODE; + } + if ($pretty) { + $flags = $flags | JSON_PRETTY_PRINT; + } return \json_encode($value, $flags); } @@ -237,3 +236,13 @@ function create_random_string(int $bytes = 16): string { return bin2hex(openssl_random_pseudo_bytes($bytes)); } + +function returnClientError($message) +{ + throw new \Exception($message, 400); +} + +function returnServerError($message) +{ + throw new \Exception($message, 500); +} diff --git a/tests/Actions/ListActionTest.php b/tests/Actions/ListActionTest.php index e0625fb3..74a90254 100644 --- a/tests/Actions/ListActionTest.php +++ b/tests/Actions/ListActionTest.php @@ -17,7 +17,8 @@ class ListActionTest extends TestCase $action = new \ListAction(); $response = $action->execute([]); $headers = $response->getHeaders(); - $this->assertSame($headers['Content-Type'], 'application/json'); + $contentType = $response->getHeader('content-type'); + $this->assertSame($contentType, 'application/json'); } public function testOutput() diff --git a/tests/CacheTest.php b/tests/CacheTest.php index 9a8ada14..15d03ec1 100644 --- a/tests/CacheTest.php +++ b/tests/CacheTest.php @@ -27,17 +27,13 @@ class CacheTest extends TestCase 'path' => $temporaryFolder, 'enable_purge' => true, ]); - $sut->setScope('scope'); - $sut->purgeCache(-1); - $sut->setKey(['key']); + $sut->clear(); - $this->assertNull($sut->getTime()); - $this->assertNull($sut->loadData()); + $this->assertNull($sut->get('key')); - $sut->saveData('data'); - $this->assertSame('data', $sut->loadData()); - $this->assertIsNumeric($sut->getTime()); - $sut->purgeCache(-1); + $sut->set('key', 'data', 5); + $this->assertSame('data', $sut->get('key')); + $sut->clear(); // Intentionally not deleting the temp folder }