fix: rewrite and improve caching (#3594)

This commit is contained in:
Dag 2023-09-10 21:50:15 +02:00 committed by GitHub
parent a786bbd4e0
commit 4b9f6f7e53
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
45 changed files with 993 additions and 1169 deletions

View file

@ -55,6 +55,10 @@ Alternatively find another
Requires minimum PHP 7.4.
```shell
apt install nginx php-fpm php-mbstring php-simplexml php-curl
```
```shell
cd /var/www
composer create-project -v --no-dev rss-bridge/rss-bridge
@ -334,10 +338,11 @@ This is the feed item structure that bridges are expected to produce.
### Cache backends
* `file`
* `sqlite`
* `memcached`
* `null`
* `File`
* `SQLite`
* `Memcached`
* `Array`
* `Null`
### Licenses

View file

@ -34,7 +34,7 @@ class ConnectivityAction implements ActionInterface
public function execute(array $request)
{
if (!Debug::isEnabled()) {
throw new \Exception('This action is only available in debug mode!');
return new Response('This action is only available in debug mode!');
}
$bridgeName = $request['bridge'] ?? null;
@ -43,7 +43,7 @@ class ConnectivityAction implements ActionInterface
}
$bridgeClassName = $this->bridgeFactory->createBridgeClassName($bridgeName);
if (!$bridgeClassName) {
throw new \Exception(sprintf('Bridge not found: %s', $bridgeName));
return new Response('Bridge not found', 404);
}
return $this->reportBridgeConnectivity($bridgeClassName);
}
@ -54,29 +54,25 @@ class ConnectivityAction implements ActionInterface
throw new \Exception('Bridge is not whitelisted!');
}
$retVal = [
'bridge' => $bridgeClassName,
'successful' => false,
'http_code' => 200,
];
$bridge = $this->bridgeFactory->create($bridgeClassName);
$curl_opts = [
CURLOPT_CONNECTTIMEOUT => 5
CURLOPT_CONNECTTIMEOUT => 5,
CURLOPT_FOLLOWLOCATION => true,
];
$result = [
'bridge' => $bridgeClassName,
'successful' => false,
'http_code' => null,
];
try {
$reply = getContents($bridge::URI, [], $curl_opts, true);
if ($reply['code'] === 200) {
$retVal['successful'] = true;
if (strpos(implode('', $reply['status_lines']), '301 Moved Permanently')) {
$retVal['http_code'] = 301;
}
$response = getContents($bridge::URI, [], $curl_opts, true);
$result['http_code'] = $response['code'];
if (in_array($response['code'], [200])) {
$result['successful'] = true;
}
} catch (\Exception $e) {
$retVal['successful'] = false;
}
return new Response(Json::encode($retVal), 200, ['Content-Type' => 'text/json']);
return new Response(Json::encode($result), 200, ['content-type' => 'text/json']);
}
}

View file

@ -45,7 +45,7 @@ class DetectAction implements ActionInterface
$bridgeParams['format'] = $format;
$url = '?action=display&' . http_build_query($bridgeParams);
return new Response('', 301, ['Location' => $url]);
return new Response('', 301, ['location' => $url]);
}
throw new \Exception('No bridge found for given URL: ' . $targetURL);

View file

@ -10,50 +10,41 @@ class DisplayAction implements ActionInterface
return new Response('503 Service Unavailable', 503);
}
$this->cache = RssBridge::getCache();
$this->cache->setScope('http');
$this->cache->setKey($request);
// avg timeout of 20m
$timeout = 60 * 15 + rand(1, 60 * 10);
$cacheKey = 'http_' . json_encode($request);
/** @var Response $cachedResponse */
$cachedResponse = $this->cache->loadData($timeout);
if ($cachedResponse && !Debug::isEnabled()) {
//Logger::info(sprintf('Returning cached (http) response: %s', $cachedResponse->getBody()));
$cachedResponse = $this->cache->get($cacheKey);
if ($cachedResponse) {
$ifModifiedSince = $_SERVER['HTTP_IF_MODIFIED_SINCE'] ?? null;
$lastModified = $cachedResponse->getHeader('last-modified');
if ($ifModifiedSince && $lastModified) {
$lastModified = new \DateTimeImmutable($lastModified);
$lastModifiedTimestamp = $lastModified->getTimestamp();
$modifiedSince = strtotime($ifModifiedSince);
if ($lastModifiedTimestamp <= $modifiedSince) {
$modificationTimeGMT = gmdate('D, d M Y H:i:s ', $lastModifiedTimestamp);
return new Response('', 304, ['last-modified' => $modificationTimeGMT . 'GMT']);
}
}
return $cachedResponse;
}
$response = $this->createResponse($request);
if (in_array($response->getCode(), [429, 503])) {
//Logger::info(sprintf('Storing cached (http) response: %s', $response->getBody()));
$this->cache->setScope('http');
$this->cache->setKey($request);
$this->cache->saveData($response);
}
return $response;
}
private function createResponse(array $request)
{
$bridgeFactory = new BridgeFactory();
$formatFactory = new FormatFactory();
$bridgeName = $request['bridge'] ?? null;
$format = $request['format'] ?? null;
if (!$bridgeName) {
return new Response('Missing bridge param', 400);
}
$bridgeFactory = new BridgeFactory();
$bridgeClassName = $bridgeFactory->createBridgeClassName($bridgeName);
if (!$bridgeClassName) {
throw new \Exception(sprintf('Bridge not found: %s', $bridgeName));
return new Response('Bridge not found', 404);
}
$format = $request['format'] ?? null;
if (!$format) {
throw new \Exception('You must specify a format!');
return new Response('You must specify a format!', 400);
}
if (!$bridgeFactory->isEnabled($bridgeClassName)) {
throw new \Exception('This bridge is not whitelisted');
return new Response('This bridge is not whitelisted', 400);
}
$format = $formatFactory->create($format);
$bridge = $bridgeFactory->create($bridgeClassName);
$bridge->loadConfiguration();
$noproxy = $request['_noproxy'] ?? null;
if (
Configuration::getConfig('proxy', 'url')
@ -64,147 +55,100 @@ class DisplayAction implements ActionInterface
define('NOPROXY', true);
}
$cacheTimeout = $request['_cache_timeout'] ?? null;
if (Configuration::getConfig('cache', 'custom_timeout') && $cacheTimeout) {
$cacheTimeout = (int) $cacheTimeout;
} else {
// At this point the query argument might still be in the url but it won't be used
$cacheTimeout = $bridge->getCacheTimeout();
$bridge = $bridgeFactory->create($bridgeClassName);
$formatFactory = new FormatFactory();
$format = $formatFactory->create($format);
$response = $this->createResponse($request, $bridge, $format);
if ($response->getCode() === 200) {
$ttl = $request['_cache_timeout'] ?? null;
if (Configuration::getConfig('cache', 'custom_timeout') && $ttl) {
$ttl = (int) $ttl;
} else {
$ttl = $bridge->getCacheTimeout();
}
$this->cache->set($cacheKey, $response, $ttl);
}
// Remove parameters that don't concern bridges
$bridge_params = array_diff_key(
$request,
array_fill_keys(
[
'action',
'bridge',
'format',
'_noproxy',
'_cache_timeout',
'_error_time'
],
''
)
);
if (in_array($response->getCode(), [429, 503])) {
$this->cache->set($cacheKey, $response, 60 * 15 + rand(1, 60 * 10)); // average 20m
}
// Remove parameters that don't concern caches
$cache_params = array_diff_key(
$request,
array_fill_keys(
[
'action',
'format',
'_noproxy',
'_cache_timeout',
'_error_time'
],
''
)
);
$this->cache->setScope('');
$this->cache->setKey($cache_params);
if ($response->getCode() === 500) {
$this->cache->set($cacheKey, $response, 60 * 15);
}
if (rand(1, 100) === 2) {
$this->cache->prune();
}
return $response;
}
private function createResponse(array $request, BridgeInterface $bridge, FormatInterface $format)
{
$items = [];
$infos = [];
$feed = $this->cache->loadData($cacheTimeout);
if ($feed && !Debug::isEnabled()) {
if (isset($_SERVER['HTTP_IF_MODIFIED_SINCE'])) {
$modificationTime = $this->cache->getTime();
// The client wants to know if the feed has changed since its last check
$modifiedSince = strtotime($_SERVER['HTTP_IF_MODIFIED_SINCE']);
if ($modificationTime <= $modifiedSince) {
$modificationTimeGMT = gmdate('D, d M Y H:i:s ', $modificationTime);
return new Response('', 304, ['Last-Modified' => $modificationTimeGMT . 'GMT']);
try {
$bridge->loadConfiguration();
// Remove parameters that don't concern bridges
$bridgeData = array_diff_key($request, array_fill_keys(['action', 'bridge', 'format', '_noproxy', '_cache_timeout', '_error_time'], ''));
$bridge->setDatas($bridgeData);
$bridge->collectData();
$items = $bridge->getItems();
if (isset($items[0]) && is_array($items[0])) {
$feedItems = [];
foreach ($items as $item) {
$feedItems[] = new FeedItem($item);
}
$items = $feedItems;
}
$infos = [
'name' => $bridge->getName(),
'uri' => $bridge->getURI(),
'donationUri' => $bridge->getDonationURI(),
'icon' => $bridge->getIcon()
];
} catch (\Exception $e) {
$errorOutput = Configuration::getConfig('error', 'output');
$reportLimit = Configuration::getConfig('error', 'report_limit');
if ($e instanceof HttpException) {
// Reproduce (and log) these responses regardless of error output and report limit
if ($e->getCode() === 429) {
Logger::info(sprintf('Exception in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e)));
return new Response('429 Too Many Requests', 429);
}
if ($e->getCode() === 503) {
Logger::info(sprintf('Exception in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e)));
return new Response('503 Service Unavailable', 503);
}
}
if (isset($feed['items']) && isset($feed['extraInfos'])) {
foreach ($feed['items'] as $item) {
$items[] = new FeedItem($item);
}
$infos = $feed['extraInfos'];
Logger::error(sprintf('Exception in DisplayAction(%s)', $bridge->getShortName()), ['e' => $e]);
$errorCount = 1;
if ($reportLimit > 1) {
$errorCount = $this->logBridgeError($bridge->getName(), $e->getCode());
}
} else {
try {
$bridge->setDatas($bridge_params);
$bridge->collectData();
$items = $bridge->getItems();
if (isset($items[0]) && is_array($items[0])) {
$feedItems = [];
foreach ($items as $item) {
$feedItems[] = new FeedItem($item);
}
$items = $feedItems;
}
$infos = [
'name' => $bridge->getName(),
'uri' => $bridge->getURI(),
'donationUri' => $bridge->getDonationURI(),
'icon' => $bridge->getIcon()
];
} catch (\Exception $e) {
$errorOutput = Configuration::getConfig('error', 'output');
$reportLimit = Configuration::getConfig('error', 'report_limit');
if ($e instanceof HttpException) {
// Reproduce (and log) these responses regardless of error output and report limit
if ($e->getCode() === 429) {
Logger::info(sprintf('Exception in DisplayAction(%s): %s', $bridgeClassName, create_sane_exception_message($e)));
return new Response('429 Too Many Requests', 429);
}
if ($e->getCode() === 503) {
Logger::info(sprintf('Exception in DisplayAction(%s): %s', $bridgeClassName, create_sane_exception_message($e)));
return new Response('503 Service Unavailable', 503);
}
// Might want to cache other codes such as 504 Gateway Timeout
}
if (in_array($errorOutput, ['feed', 'none'])) {
Logger::error(sprintf('Exception in DisplayAction(%s): %s', $bridgeClassName, create_sane_exception_message($e)), ['e' => $e]);
}
$errorCount = 1;
if ($reportLimit > 1) {
$errorCount = $this->logBridgeError($bridge->getName(), $e->getCode());
}
// Let clients know about the error if we are passed the report limit
if ($errorCount >= $reportLimit) {
if ($errorOutput === 'feed') {
// Render the exception as a feed item
$items[] = $this->createFeedItemFromException($e, $bridge);
} elseif ($errorOutput === 'http') {
// Rethrow so that the main exception handler in RssBridge.php produces an HTTP 500
throw $e;
} elseif ($errorOutput === 'none') {
// Do nothing (produces an empty feed)
} else {
// Do nothing, unknown error output? Maybe throw exception or validate in Configuration.php
}
// Let clients know about the error if we are passed the report limit
if ($errorCount >= $reportLimit) {
if ($errorOutput === 'feed') {
// Render the exception as a feed item
$items[] = $this->createFeedItemFromException($e, $bridge);
} elseif ($errorOutput === 'http') {
return new Response(render(__DIR__ . '/../templates/error.html.php', ['e' => $e]), 500);
} elseif ($errorOutput === 'none') {
// Do nothing (produces an empty feed)
}
}
// Unfortunately need to set scope and key again because they might be modified
$this->cache->setScope('');
$this->cache->setKey($cache_params);
$this->cache->saveData([
'items' => array_map(function (FeedItem $item) {
return $item->toArray();
}, $items),
'extraInfos' => $infos
]);
$this->cache->purgeCache();
}
$format->setItems($items);
$format->setExtraInfos($infos);
$newModificationTime = $this->cache->getTime();
$format->setLastModified($newModificationTime);
$headers = [];
if ($newModificationTime) {
$headers['Last-Modified'] = gmdate('D, d M Y H:i:s ', $newModificationTime) . 'GMT';
}
$headers['Content-Type'] = $format->getMimeType() . '; charset=' . $format->getCharset();
$now = time();
$format->setLastModified($now);
$headers = [
'last-modified' => gmdate('D, d M Y H:i:s ', $now) . 'GMT',
'content-type' => $format->getMimeType() . '; charset=' . $format->getCharset(),
];
return new Response($format->stringify(), 200, $headers);
}
@ -234,9 +178,8 @@ class DisplayAction implements ActionInterface
private function logBridgeError($bridgeName, $code)
{
$this->cache->setScope('error_reporting');
$this->cache->setkey([$bridgeName . '_' . $code]);
$report = $this->cache->loadData();
$cacheKey = 'error_reporting_' . $bridgeName . '_' . $code;
$report = $this->cache->get($cacheKey);
if ($report) {
$report = Json::decode($report);
$report['time'] = time();
@ -248,7 +191,8 @@ class DisplayAction implements ActionInterface
'count' => 1,
];
}
$this->cache->saveData(Json::encode($report));
$ttl = 86400 * 5;
$this->cache->set($cacheKey, Json::encode($report), $ttl);
return $report['count'];
}

View file

@ -37,6 +37,6 @@ class ListAction implements ActionInterface
];
}
$list->total = count($list->bridges);
return new Response(Json::encode($list), 200, ['Content-Type' => 'application/json']);
return new Response(Json::encode($list), 200, ['content-type' => 'application/json']);
}
}

View file

@ -19,7 +19,10 @@ class SetBridgeCacheAction implements ActionInterface
$authenticationMiddleware = new ApiAuthenticationMiddleware();
$authenticationMiddleware($request);
$key = $request['key'] or returnClientError('You must specify key!');
$key = $request['key'] ?? null;
if (!$key) {
returnClientError('You must specify key!');
}
$bridgeFactory = new BridgeFactory();
@ -40,13 +43,10 @@ class SetBridgeCacheAction implements ActionInterface
$value = $request['value'];
$cache = RssBridge::getCache();
$cache->setScope(get_class($bridge));
if (!is_array($key)) {
// not sure if $key is an array when it comes in from request
$key = [$key];
}
$cache->setKey($key);
$cache->saveData($value);
$cacheKey = get_class($bridge) . '_' . $key;
$ttl = 86400 * 3;
$cache->set($cacheKey, $value, $ttl);
header('Content-Type: text/plain');
echo 'done';

View file

@ -33,6 +33,7 @@ class AO3Bridge extends BridgeAbstract
],
]
];
private $title;
public function collectData()
{
@ -94,11 +95,12 @@ class AO3Bridge extends BridgeAbstract
$url = self::URI . "/works/$id/navigate";
$httpClient = RssBridge::getHttpClient();
$version = 'v0.0.1';
$response = $httpClient->request($url, [
'useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)',
'useragent' => "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)",
]);
$html = \str_get_html($response['body']);
$html = \str_get_html($response->getBody());
$html = defaultLinkTo($html, self::URI);
$this->title = $html->find('h2 a', 0)->plaintext;

View file

@ -159,7 +159,7 @@ class BugzillaBridge extends BridgeAbstract
protected function getUser($user)
{
// Check if the user endpoint is available
if ($this->loadCacheValue($this->instance . 'userEndpointClosed', 86400)) {
if ($this->loadCacheValue($this->instance . 'userEndpointClosed')) {
return $user;
}

View file

@ -114,18 +114,17 @@ class ElloBridge extends BridgeAbstract
private function getAPIKey()
{
$cache = RssBridge::getCache();
$cache->setScope('ElloBridge');
$cache->setKey(['key']);
$key = $cache->loadData();
$cacheKey = 'ElloBridge_key';
$apiKey = $cache->get($cacheKey);
if ($key == null) {
$keyInfo = getContents(self::URI . 'api/webapp-token') or
returnServerError('Unable to get token.');
$key = json_decode($keyInfo)->token->access_token;
$cache->saveData($key);
if (!$apiKey) {
$keyInfo = getContents(self::URI . 'api/webapp-token') or returnServerError('Unable to get token.');
$apiKey = json_decode($keyInfo)->token->access_token;
$ttl = 60 * 60 * 20;
$cache->set($cacheKey, $apiKey, $ttl);
}
return $key;
return $apiKey;
}
public function getName()

View file

@ -99,23 +99,22 @@ class InstagramBridge extends BridgeAbstract
}
$cache = RssBridge::getCache();
$cache->setScope('InstagramBridge');
$cache->setKey([$username]);
$key = $cache->loadData();
$cacheKey = 'InstagramBridge_' . $username;
$pk = $cache->get($cacheKey);
if ($key == null) {
if (!$pk) {
$data = $this->getContents(self::URI . 'web/search/topsearch/?query=' . $username);
foreach (json_decode($data)->users as $user) {
if (strtolower($user->user->username) === strtolower($username)) {
$key = $user->user->pk;
$pk = $user->user->pk;
}
}
if ($key == null) {
if (!$pk) {
returnServerError('Unable to find username in search result.');
}
$cache->saveData($key);
$cache->set($cacheKey, $pk);
}
return $key;
return $pk;
}
public function collectData()

View file

@ -100,7 +100,7 @@ class MastodonBridge extends BridgeAbstract
// We fetch the boosted content.
try {
$rtContent = $this->fetchAP($content['object']);
$rtUser = $this->loadCacheValue($rtContent['attributedTo'], 86400);
$rtUser = $this->loadCacheValue($rtContent['attributedTo']);
if (!isset($rtUser)) {
// We fetch the author, since we cannot always assume the format of the URL.
$user = $this->fetchAP($rtContent['attributedTo']);

View file

@ -72,8 +72,30 @@ class RedditBridge extends BridgeAbstract
]
]
];
private CacheInterface $cache;
public function __construct()
{
$this->cache = RssBridge::getCache();
}
public function collectData()
{
$cacheKey = 'reddit_rate_limit';
if ($this->cache->get($cacheKey)) {
throw new HttpException('429 Too Many Requests', 429);
}
try {
$this->collectDataInternal();
} catch (HttpException $e) {
if ($e->getCode() === 429) {
$this->cache->set($cacheKey, true, 60 * 16);
throw $e;
}
}
}
private function collectDataInternal(): void
{
$user = false;
$comments = false;

View file

@ -36,7 +36,7 @@ class SoundCloudBridge extends BridgeAbstract
private $feedTitle = null;
private $feedIcon = null;
private $cache = null;
private CacheInterface $cache;
private $clientIdRegex = '/client_id.*?"(.+?)"/';
private $widgetRegex = '/widget-.+?\.js/';
@ -44,8 +44,6 @@ class SoundCloudBridge extends BridgeAbstract
public function collectData()
{
$this->cache = RssBridge::getCache();
$this->cache->setScope('SoundCloudBridge');
$this->cache->setKey(['client_id']);
$res = $this->getUser($this->getInput('u'));
@ -121,11 +119,9 @@ HTML;
private function getClientID()
{
$this->cache->setScope('SoundCloudBridge');
$this->cache->setKey(['client_id']);
$clientID = $this->cache->loadData();
$clientID = $this->cache->get('SoundCloudBridge_client_id');
if ($clientID == null) {
if (!$clientID) {
return $this->refreshClientID();
} else {
return $clientID;
@ -151,10 +147,7 @@ HTML;
if (preg_match($this->clientIdRegex, $widgetJS, $matches)) {
$clientID = $matches[1];
$this->cache->setScope('SoundCloudBridge');
$this->cache->setKey(['client_id']);
$this->cache->saveData($clientID);
$this->cache->set('SoundCloudBridge_client_id', $clientID);
return $clientID;
}
}

View file

@ -279,10 +279,9 @@ class SpotifyBridge extends BridgeAbstract
private function fetchAccessToken()
{
$cache = RssBridge::getCache();
$cacheKey = sprintf('%s:%s', $this->getInput('clientid'), $this->getInput('clientsecret'));
$cache->setScope('SpotifyBridge');
$cache->setKey([$cacheKey]);
$token = $cache->loadData(3600);
$cacheKey = sprintf('SpotifyBridge:%s:%s', $this->getInput('clientid'), $this->getInput('clientsecret'));
$token = $cache->get($cacheKey);
if ($token) {
$this->token = $token;
} else {
@ -294,9 +293,8 @@ class SpotifyBridge extends BridgeAbstract
]);
$data = Json::decode($json);
$this->token = $data['access_token'];
$cache->setScope('SpotifyBridge');
$cache->setKey([$cacheKey]);
$cache->saveData($this->token);
$cache->set($cacheKey, $this->token, 3600);
}
}

View file

@ -594,156 +594,4 @@ EOD;
{
return (intval($tweet1['id']) < intval($tweet2['id']) ? 1 : -1);
}
//The aim of this function is to get an API key and a guest token
//This function takes 2 requests, and therefore is cached
private function getApiKey($forceNew = 0)
{
$r_cache = RssBridge::getCache();
$scope = 'TwitterBridge';
$r_cache->setScope($scope);
$r_cache->setKey(['refresh']);
$data = $r_cache->loadData();
$refresh = null;
if ($data === null) {
$refresh = time();
$r_cache->saveData($refresh);
} else {
$refresh = $data;
}
$cacheFactory = new CacheFactory();
$cache = RssBridge::getCache();
$cache->setScope($scope);
$cache->setKey(['api_key']);
$data = $cache->loadData();
$apiKey = null;
if ($forceNew || $data === null || (time() - $refresh) > self::GUEST_TOKEN_EXPIRY) {
$twitterPage = getContents('https://twitter.com');
$jsLink = false;
$jsMainRegexArray = [
'/(https:\/\/abs\.twimg\.com\/responsive-web\/web\/main\.[^\.]+\.js)/m',
'/(https:\/\/abs\.twimg\.com\/responsive-web\/web_legacy\/main\.[^\.]+\.js)/m',
'/(https:\/\/abs\.twimg\.com\/responsive-web\/client-web\/main\.[^\.]+\.js)/m',
'/(https:\/\/abs\.twimg\.com\/responsive-web\/client-web-legacy\/main\.[^\.]+\.js)/m',
];
foreach ($jsMainRegexArray as $jsMainRegex) {
if (preg_match_all($jsMainRegex, $twitterPage, $jsMainMatches, PREG_SET_ORDER, 0)) {
$jsLink = $jsMainMatches[0][0];
break;
}
}
if (!$jsLink) {
returnServerError('Could not locate main.js link');
}
$jsContent = getContents($jsLink);
$apiKeyRegex = '/([a-zA-Z0-9]{59}%[a-zA-Z0-9]{44})/m';
preg_match_all($apiKeyRegex, $jsContent, $apiKeyMatches, PREG_SET_ORDER, 0);
$apiKey = $apiKeyMatches[0][0];
$cache->saveData($apiKey);
} else {
$apiKey = $data;
}
$gt_cache = RssBridge::getCache();
$gt_cache->setScope($scope);
$gt_cache->setKey(['guest_token']);
$guestTokenUses = $gt_cache->loadData();
$guestToken = null;
if (
$forceNew || $guestTokenUses === null || !is_array($guestTokenUses) || count($guestTokenUses) != 2
|| $guestTokenUses[0] <= 0 || (time() - $refresh) > self::GUEST_TOKEN_EXPIRY
) {
$guestToken = $this->getGuestToken($apiKey);
if ($guestToken === null) {
if ($guestTokenUses === null) {
returnServerError('Could not parse guest token');
} else {
$guestToken = $guestTokenUses[1];
}
} else {
$gt_cache->saveData([self::GUEST_TOKEN_USES, $guestToken]);
$r_cache->saveData(time());
}
} else {
$guestTokenUses[0] -= 1;
$gt_cache->saveData($guestTokenUses);
$guestToken = $guestTokenUses[1];
}
$this->apiKey = $apiKey;
$this->guestToken = $guestToken;
$this->authHeaders = [
'authorization: Bearer ' . $apiKey,
'x-guest-token: ' . $guestToken,
];
return [$apiKey, $guestToken];
}
// Get a guest token. This is different to an API key,
// and it seems to change more regularly than the API key.
private function getGuestToken($apiKey)
{
$headers = [
'authorization: Bearer ' . $apiKey,
];
$opts = [
CURLOPT_POST => 1,
];
try {
$pageContent = getContents('https://api.twitter.com/1.1/guest/activate.json', $headers, $opts, true);
$guestToken = json_decode($pageContent['content'])->guest_token;
} catch (Exception $e) {
$guestToken = null;
}
return $guestToken;
}
/**
* Tries to make an API call to twitter.
* @param $api string API entry point
* @param $params array additional URI parmaeters
* @return object json data
*/
private function makeApiCall($api, $params)
{
$uri = self::API_URI . $api . '?' . http_build_query($params);
$retries = 1;
$retry = 0;
do {
$retry = 0;
try {
$result = getContents($uri, $this->authHeaders, [], true);
} catch (HttpException $e) {
switch ($e->getCode()) {
case 401:
// fall-through
case 403:
if ($retries) {
$retries--;
$retry = 1;
$this->getApiKey(1);
continue 2;
}
// fall-through
default:
throw $e;
}
}
} while ($retry);
$data = json_decode($result['content']);
return $data;
}
}

View file

@ -117,7 +117,7 @@ The default URI shows the Madara demo page.';
protected function getMangaInfo($url)
{
$url_cache = 'TitleInfo_' . preg_replace('/[^\w]/', '.', rtrim($url, '/'));
$cache = $this->loadCacheValue($url_cache, 86400);
$cache = $this->loadCacheValue($url_cache);
if (isset($cache)) {
return $cache;
}

View file

@ -77,6 +77,138 @@ class YoutubeBridge extends BridgeAbstract
private $channel_name = '';
// This took from repo BetterVideoRss of VerifiedJoseph.
const URI_REGEX = '/(https?:\/\/(?:www\.)?(?:[a-zA-Z0-9-.]{2,256}\.[a-z]{2,20})(\:[0-9]{2 ,4})?(?:\/[a-zA-Z0-9@:%_\+.,~#"\'!?&\/\/=\-*]+|\/)?)/ims'; //phpcs:ignore
private CacheInterface $cache;
public function __construct()
{
$this->cache = RssBridge::getCache();
}
private function collectDataInternal()
{
$xml = '';
$html = '';
$url_feed = '';
$url_listing = '';
if ($this->getInput('u')) {
/* User and Channel modes */
$this->request = $this->getInput('u');
$url_feed = self::URI . 'feeds/videos.xml?user=' . urlencode($this->request);
$url_listing = self::URI . 'user/' . urlencode($this->request) . '/videos';
} elseif ($this->getInput('c')) {
$this->request = $this->getInput('c');
$url_feed = self::URI . 'feeds/videos.xml?channel_id=' . urlencode($this->request);
$url_listing = self::URI . 'channel/' . urlencode($this->request) . '/videos';
} elseif ($this->getInput('custom')) {
$this->request = $this->getInput('custom');
$url_listing = self::URI . urlencode($this->request) . '/videos';
}
if (!empty($url_feed) || !empty($url_listing)) {
$this->feeduri = $url_listing;
if (!empty($this->getInput('custom'))) {
$html = $this->ytGetSimpleHTMLDOM($url_listing);
$jsonData = $this->getJSONData($html);
$url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl;
$this->iconURL = $jsonData->metadata->channelMetadataRenderer->avatar->thumbnails[0]->url;
}
if (!$this->skipFeeds()) {
$html = $this->ytGetSimpleHTMLDOM($url_feed);
$this->ytBridgeParseXmlFeed($html);
} else {
if (empty($this->getInput('custom'))) {
$html = $this->ytGetSimpleHTMLDOM($url_listing);
$jsonData = $this->getJSONData($html);
}
$channel_id = '';
if (isset($jsonData->contents)) {
$channel_id = $jsonData->metadata->channelMetadataRenderer->externalId;
$jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1];
$jsonData = $jsonData->tabRenderer->content->richGridRenderer->contents;
// $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items;
$this->parseJSONListing($jsonData);
} else {
returnServerError('Unable to get data from YouTube. Username/Channel: ' . $this->request);
}
}
$this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext);
} elseif ($this->getInput('p')) {
/* playlist mode */
// TODO: this mode makes a lot of excess video query requests.
// To make less requests, we need to cache following dictionary "videoId -> datePublished, duration"
// This cache will be used to find out, which videos to fetch
// to make feed of 15 items or more, if there a lot of videos published on that date.
$this->request = $this->getInput('p');
$url_feed = self::URI . 'feeds/videos.xml?playlist_id=' . urlencode($this->request);
$url_listing = self::URI . 'playlist?list=' . urlencode($this->request);
$html = $this->ytGetSimpleHTMLDOM($url_listing);
$jsonData = $this->getJSONData($html);
// TODO: this method returns only first 100 video items
// if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element
$jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0];
$jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer;
$jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents;
$item_count = count($jsonData);
if ($item_count <= 15 && !$this->skipFeeds() && ($xml = $this->ytGetSimpleHTMLDOM($url_feed))) {
$this->ytBridgeParseXmlFeed($xml);
} else {
$this->parseJSONListing($jsonData);
}
$this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext);
usort($this->items, function ($item1, $item2) {
if (!is_int($item1['timestamp']) && !is_int($item2['timestamp'])) {
$item1['timestamp'] = strtotime($item1['timestamp']);
$item2['timestamp'] = strtotime($item2['timestamp']);
}
return $item2['timestamp'] - $item1['timestamp'];
});
} elseif ($this->getInput('s')) {
/* search mode */
$this->request = $this->getInput('s');
$url_listing = self::URI
. 'results?search_query='
. urlencode($this->request)
. '&sp=CAI%253D';
$html = $this->ytGetSimpleHTMLDOM($url_listing);
$jsonData = $this->getJSONData($html);
$jsonData = $jsonData->contents->twoColumnSearchResultsRenderer->primaryContents;
$jsonData = $jsonData->sectionListRenderer->contents;
foreach ($jsonData as $data) {
// Search result includes some ads, have to filter them
if (isset($data->itemSectionRenderer->contents[0]->videoRenderer)) {
$jsonData = $data->itemSectionRenderer->contents;
break;
}
}
$this->parseJSONListing($jsonData);
$this->feeduri = $url_listing;
$this->feedName = 'Search: ' . $this->request;
} else {
/* no valid mode */
returnClientError("You must either specify either:\n - YouTube
username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)");
}
}
public function collectData()
{
$cacheKey = 'youtube_rate_limit';
if ($this->cache->get($cacheKey)) {
throw new HttpException('429 Too Many Requests', 429);
}
try {
$this->collectDataInternal();
} catch (HttpException $e) {
if ($e->getCode() === 429) {
$this->cache->set($cacheKey, true, 60 * 16);
throw $e;
}
}
}
private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time)
{
@ -153,7 +285,8 @@ class YoutubeBridge extends BridgeAbstract
$item['timestamp'] = $time;
$item['uri'] = self::URI . 'watch?v=' . $vid;
if (!$thumbnail) {
$thumbnail = '0'; // Fallback to default thumbnail if there aren't any provided.
// Fallback to default thumbnail if there aren't any provided.
$thumbnail = '0';
}
$thumbnailUri = str_replace('/www.', '/img.', self::URI) . 'vi/' . $vid . '/' . $thumbnail . '.jpg';
$item['content'] = '<a href="' . $item['uri'] . '"><img src="' . $thumbnailUri . '" /></a><br />' . $desc;
@ -315,111 +448,6 @@ class YoutubeBridge extends BridgeAbstract
}
}
public function collectData()
{
$xml = '';
$html = '';
$url_feed = '';
$url_listing = '';
if ($this->getInput('u')) { /* User and Channel modes */
$this->request = $this->getInput('u');
$url_feed = self::URI . 'feeds/videos.xml?user=' . urlencode($this->request);
$url_listing = self::URI . 'user/' . urlencode($this->request) . '/videos';
} elseif ($this->getInput('c')) {
$this->request = $this->getInput('c');
$url_feed = self::URI . 'feeds/videos.xml?channel_id=' . urlencode($this->request);
$url_listing = self::URI . 'channel/' . urlencode($this->request) . '/videos';
} elseif ($this->getInput('custom')) {
$this->request = $this->getInput('custom');
$url_listing = self::URI . urlencode($this->request) . '/videos';
}
if (!empty($url_feed) || !empty($url_listing)) {
$this->feeduri = $url_listing;
if (!empty($this->getInput('custom'))) {
$html = $this->ytGetSimpleHTMLDOM($url_listing);
$jsonData = $this->getJSONData($html);
$url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl;
$this->iconURL = $jsonData->metadata->channelMetadataRenderer->avatar->thumbnails[0]->url;
}
if (!$this->skipFeeds()) {
$html = $this->ytGetSimpleHTMLDOM($url_feed);
$this->ytBridgeParseXmlFeed($html);
} else {
if (empty($this->getInput('custom'))) {
$html = $this->ytGetSimpleHTMLDOM($url_listing);
$jsonData = $this->getJSONData($html);
}
$channel_id = '';
if (isset($jsonData->contents)) {
$channel_id = $jsonData->metadata->channelMetadataRenderer->externalId;
$jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1];
$jsonData = $jsonData->tabRenderer->content->richGridRenderer->contents;
// $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items;
$this->parseJSONListing($jsonData);
} else {
returnServerError('Unable to get data from YouTube. Username/Channel: ' . $this->request);
}
}
$this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext);
} elseif ($this->getInput('p')) { /* playlist mode */
// TODO: this mode makes a lot of excess video query requests.
// To make less requests, we need to cache following dictionary "videoId -> datePublished, duration"
// This cache will be used to find out, which videos to fetch
// to make feed of 15 items or more, if there a lot of videos published on that date.
$this->request = $this->getInput('p');
$url_feed = self::URI . 'feeds/videos.xml?playlist_id=' . urlencode($this->request);
$url_listing = self::URI . 'playlist?list=' . urlencode($this->request);
$html = $this->ytGetSimpleHTMLDOM($url_listing);
$jsonData = $this->getJSONData($html);
// TODO: this method returns only first 100 video items
// if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element
$jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0];
$jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer;
$jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents;
$item_count = count($jsonData);
if ($item_count <= 15 && !$this->skipFeeds() && ($xml = $this->ytGetSimpleHTMLDOM($url_feed))) {
$this->ytBridgeParseXmlFeed($xml);
} else {
$this->parseJSONListing($jsonData);
}
$this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName()
usort($this->items, function ($item1, $item2) {
if (!is_int($item1['timestamp']) && !is_int($item2['timestamp'])) {
$item1['timestamp'] = strtotime($item1['timestamp']);
$item2['timestamp'] = strtotime($item2['timestamp']);
}
return $item2['timestamp'] - $item1['timestamp'];
});
} elseif ($this->getInput('s')) { /* search mode */
$this->request = $this->getInput('s');
$url_listing = self::URI
. 'results?search_query='
. urlencode($this->request)
. '&sp=CAI%253D';
$html = $this->ytGetSimpleHTMLDOM($url_listing);
$jsonData = $this->getJSONData($html);
$jsonData = $jsonData->contents->twoColumnSearchResultsRenderer->primaryContents;
$jsonData = $jsonData->sectionListRenderer->contents;
foreach ($jsonData as $data) { // Search result includes some ads, have to filter them
if (isset($data->itemSectionRenderer->contents[0]->videoRenderer)) {
$jsonData = $data->itemSectionRenderer->contents;
break;
}
}
$this->parseJSONListing($jsonData);
$this->feeduri = $url_listing;
$this->feedName = 'Search: ' . $this->request; // feedName will be used by getName()
} else { /* no valid mode */
returnClientError("You must either specify either:\n - YouTube
username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)");
}
}
private function skipFeeds()
{
return ($this->getInput('duration_min') || $this->getInput('duration_max'));
@ -438,14 +466,13 @@ class YoutubeBridge extends BridgeAbstract
public function getName()
{
// Name depends on queriedContext:
switch ($this->queriedContext) {
case 'By username':
case 'By channel id':
case 'By custom name':
case 'By playlist Id':
case 'Search result':
return htmlspecialchars_decode($this->feedName) . ' - YouTube'; // We already know it's a bridge, right?
return htmlspecialchars_decode($this->feedName) . ' - YouTube';
default:
return parent::getName();
}

52
caches/ArrayCache.php Normal file
View file

@ -0,0 +1,52 @@
<?php
declare(strict_types=1);
class ArrayCache implements CacheInterface
{
private array $data = [];
public function get(string $key, $default = null)
{
$item = $this->data[$key] ?? null;
if (!$item) {
return $default;
}
$expiration = $item['expiration'];
if ($expiration === 0 || $expiration > time()) {
return $item['value'];
}
$this->delete($key);
return $default;
}
public function set(string $key, $value, int $ttl = null): void
{
$this->data[$key] = [
'key' => $key,
'value' => $value,
'expiration' => $ttl === null ? 0 : time() + $ttl,
];
}
public function delete(string $key): void
{
unset($this->data[$key]);
}
public function clear(): void
{
$this->data = [];
}
public function prune(): void
{
foreach ($this->data as $key => $item) {
$expiration = $item['expiration'];
if ($expiration === 0 || $expiration > time()) {
continue;
}
$this->delete($key);
}
}
}

View file

@ -1,13 +1,10 @@
<?php
/**
* @link https://www.php.net/manual/en/function.clearstatcache.php
*/
declare(strict_types=1);
class FileCache implements CacheInterface
{
private array $config;
protected string $scope;
protected string $key;
public function __construct(array $config = [])
{
@ -23,125 +20,89 @@ class FileCache implements CacheInterface
$this->config['path'] = rtrim($this->config['path'], '/') . '/';
}
public function getConfig()
public function get(string $key, $default = null)
{
return $this->config;
$cacheFile = $this->createCacheFile($key);
if (!file_exists($cacheFile)) {
return $default;
}
$item = unserialize(file_get_contents($cacheFile));
if ($item === false) {
Logger::warning(sprintf('Failed to unserialize: %s', $cacheFile));
$this->delete($key);
return $default;
}
$expiration = $item['expiration'];
if ($expiration === 0 || $expiration > time()) {
return $item['value'];
}
$this->delete($key);
return $default;
}
public function loadData(int $timeout = 86400)
public function set($key, $value, int $ttl = null): void
{
clearstatcache();
if (!file_exists($this->getCacheFile())) {
return null;
}
$modificationTime = filemtime($this->getCacheFile());
if (time() - $timeout < $modificationTime) {
$data = unserialize(file_get_contents($this->getCacheFile()));
if ($data === false) {
Logger::warning(sprintf('Failed to unserialize: %s', $this->getCacheFile()));
// Intentionally not throwing an exception
return null;
}
return $data;
}
// It's a good idea to delete the expired item here, but commented out atm
// unlink($this->getCacheFile());
return null;
}
public function saveData($data): void
{
$bytes = file_put_contents($this->getCacheFile(), serialize($data), LOCK_EX);
$item = [
'key' => $key,
'value' => $value,
'expiration' => $ttl === null ? 0 : time() + $ttl,
];
$cacheFile = $this->createCacheFile($key);
$bytes = file_put_contents($cacheFile, serialize($item), LOCK_EX);
if ($bytes === false) {
throw new \Exception(sprintf('Failed to write to: %s', $this->getCacheFile()));
// Consider just logging the error here
throw new \Exception(sprintf('Failed to write to: %s', $cacheFile));
}
}
public function getTime(): ?int
public function delete(string $key): void
{
clearstatcache();
$cacheFile = $this->getCacheFile();
if (file_exists($cacheFile)) {
$time = filemtime($cacheFile);
if ($time !== false) {
return $time;
}
return null;
}
return null;
unlink($this->createCacheFile($key));
}
public function purgeCache(int $timeout = 86400): void
public function clear(): void
{
foreach (scandir($this->config['path']) as $filename) {
$cacheFile = $this->config['path'] . $filename;
$excluded = ['.' => true, '..' => true, '.gitkeep' => true];
if (isset($excluded[$filename]) || !is_file($cacheFile)) {
continue;
}
unlink($cacheFile);
}
}
public function prune(): void
{
if (! $this->config['enable_purge']) {
return;
}
$cachePath = $this->getScope();
if (!file_exists($cachePath)) {
return;
}
$cacheIterator = new \RecursiveIteratorIterator(
new \RecursiveDirectoryIterator($cachePath),
\RecursiveIteratorIterator::CHILD_FIRST
);
foreach ($cacheIterator as $cacheFile) {
$basename = $cacheFile->getBasename();
$excluded = [
'.' => true,
'..' => true,
'.gitkeep' => true,
];
if (isset($excluded[$basename])) {
foreach (scandir($this->config['path']) as $filename) {
$cacheFile = $this->config['path'] . $filename;
$excluded = ['.' => true, '..' => true, '.gitkeep' => true];
if (isset($excluded[$filename]) || !is_file($cacheFile)) {
continue;
} elseif ($cacheFile->isFile()) {
$filepath = $cacheFile->getPathname();
if (filemtime($filepath) < time() - $timeout) {
// todo: sometimes this file doesn't exists
unlink($filepath);
}
}
}
}
public function setScope(string $scope): void
{
$this->scope = $this->config['path'] . trim($scope, " \t\n\r\0\x0B\\\/") . '/';
}
public function setKey(array $key): void
{
$this->key = json_encode($key);
}
private function getScope()
{
if (is_null($this->scope)) {
throw new \Exception('Call "setScope" first!');
}
if (!is_dir($this->scope)) {
if (mkdir($this->scope, 0755, true) !== true) {
throw new \Exception('mkdir: Unable to create file cache folder');
$item = unserialize(file_get_contents($cacheFile));
if ($item === false) {
unlink($cacheFile);
continue;
}
$expiration = $item['expiration'];
if ($expiration === 0 || $expiration > time()) {
continue;
}
unlink($cacheFile);
}
return $this->scope;
}
private function getCacheFile()
private function createCacheFile(string $key): string
{
return $this->getScope() . $this->getCacheName();
return $this->config['path'] . hash('md5', $key) . '.cache';
}
private function getCacheName()
public function getConfig()
{
if (is_null($this->key)) {
throw new \Exception('Call "setKey" first!');
}
return hash('md5', $this->key) . '.cache';
return $this->config;
}
}

View file

@ -1,70 +1,36 @@
<?php
declare(strict_types=1);
class MemcachedCache implements CacheInterface
{
private string $scope;
private string $key;
private $conn;
private $expiration = 0;
private \Memcached $conn;
public function __construct()
public function __construct(string $host, int $port)
{
if (!extension_loaded('memcached')) {
throw new \Exception('"memcached" extension not loaded. Please check "php.ini"');
$this->conn = new \Memcached();
// This call does not actually connect to server yet
if (!$this->conn->addServer($host, $port)) {
throw new \Exception('Unable to add memcached server');
}
$section = 'MemcachedCache';
$host = Configuration::getConfig($section, 'host');
$port = Configuration::getConfig($section, 'port');
if (empty($host) && empty($port)) {
throw new \Exception('Configuration for ' . $section . ' missing.');
}
if (empty($host)) {
throw new \Exception('"host" param is not set for ' . $section);
}
if (empty($port)) {
throw new \Exception('"port" param is not set for ' . $section);
}
if (!ctype_digit($port)) {
throw new \Exception('"port" param is invalid for ' . $section);
}
$port = intval($port);
if ($port < 1 || $port > 65535) {
throw new \Exception('"port" param is invalid for ' . $section);
}
$conn = new \Memcached();
$conn->addServer($host, $port) or returnServerError('Could not connect to memcached server');
$this->conn = $conn;
}
public function loadData(int $timeout = 86400)
public function get(string $key, $default = null)
{
$value = $this->conn->get($this->getCacheKey());
$value = $this->conn->get($key);
if ($value === false) {
return null;
return $default;
}
if (time() - $timeout < $value['time']) {
return $value['data'];
}
return null;
return $value;
}
public function saveData($data): void
public function set(string $key, $value, $ttl = null): void
{
$value = [
'data' => $data,
'time' => time(),
];
$result = $this->conn->set($this->getCacheKey(), $value, $this->expiration);
$expiration = $ttl === null ? 0 : time() + $ttl;
$result = $this->conn->set($key, $value, $expiration);
if ($result === false) {
Logger::warning('Failed to store an item in memcached', [
'scope' => $this->scope,
'key' => $this->key,
'expiration' => $this->expiration,
'key' => $key,
'code' => $this->conn->getLastErrorCode(),
'message' => $this->conn->getLastErrorMessage(),
'number' => $this->conn->getLastErrorErrno(),
@ -73,38 +39,18 @@ class MemcachedCache implements CacheInterface
}
}
public function getTime(): ?int
public function delete(string $key): void
{
$value = $this->conn->get($this->getCacheKey());
if ($value === false) {
return null;
}
return $value['time'];
$this->conn->delete($key);
}
public function purgeCache(int $timeout = 86400): void
public function clear(): void
{
// Note: does not purges cache right now
// Just sets cache expiration and leave cache purging for memcached itself
$this->expiration = $timeout;
$this->conn->flush();
}
public function setScope(string $scope): void
public function prune(): void
{
$this->scope = $scope;
}
public function setKey(array $key): void
{
$this->key = json_encode($key);
}
private function getCacheKey()
{
if (is_null($this->key)) {
throw new \Exception('Call "setKey" first!');
}
return 'rss_bridge_cache_' . hash('md5', $this->scope . $this->key . 'A');
// memcached manages pruning on its own
}
}

View file

@ -4,28 +4,24 @@ declare(strict_types=1);
class NullCache implements CacheInterface
{
public function setScope(string $scope): void
public function get(string $key, $default = null)
{
return $default;
}
public function set(string $key, $value, int $ttl = null): void
{
}
public function setKey(array $key): void
public function delete(string $key): void
{
}
public function loadData(int $timeout = 86400)
public function clear(): void
{
}
public function saveData($data): void
{
}
public function getTime(): ?int
{
return null;
}
public function purgeCache(int $timeout = 86400): void
public function prune(): void
{
}
}

View file

@ -1,10 +1,10 @@
<?php
declare(strict_types=1);
class SQLiteCache implements CacheInterface
{
private \SQLite3 $db;
private string $scope;
private string $key;
private array $config;
public function __construct(array $config)
@ -31,85 +31,77 @@ class SQLiteCache implements CacheInterface
$this->db->exec("CREATE TABLE storage ('key' BLOB PRIMARY KEY, 'value' BLOB, 'updated' INTEGER)");
}
$this->db->busyTimeout($config['timeout']);
// https://www.sqlite.org/pragma.html#pragma_journal_mode
$this->db->exec('PRAGMA journal_mode = wal');
// https://www.sqlite.org/pragma.html#pragma_synchronous
$this->db->exec('PRAGMA synchronous = NORMAL');
}
public function loadData(int $timeout = 86400)
public function get(string $key, $default = null)
{
$cacheKey = $this->createCacheKey($key);
$stmt = $this->db->prepare('SELECT value, updated FROM storage WHERE key = :key');
$stmt->bindValue(':key', $this->getCacheKey());
$stmt->bindValue(':key', $cacheKey);
$result = $stmt->execute();
if (!$result) {
return null;
return $default;
}
$row = $result->fetchArray(\SQLITE3_ASSOC);
if ($row === false) {
return null;
return $default;
}
$value = $row['value'];
$modificationTime = $row['updated'];
if (time() - $timeout < $modificationTime) {
$data = unserialize($value);
if ($data === false) {
Logger::error(sprintf("Failed to unserialize: '%s'", mb_substr($value, 0, 100)));
return null;
$expiration = $row['updated'];
if ($expiration === 0 || $expiration > time()) {
$blob = $row['value'];
$value = unserialize($blob);
if ($value === false) {
Logger::error(sprintf("Failed to unserialize: '%s'", mb_substr($blob, 0, 100)));
// delete?
return $default;
}
return $data;
return $value;
}
// It's a good idea to delete expired cache items.
// However I'm seeing lots of SQLITE_BUSY errors so commented out for now
// $stmt = $this->db->prepare('DELETE FROM storage WHERE key = :key');
// $stmt->bindValue(':key', $this->getCacheKey());
// $stmt->execute();
return null;
// delete?
return $default;
}
public function saveData($data): void
public function set(string $key, $value, int $ttl = null): void
{
$blob = serialize($data);
$cacheKey = $this->createCacheKey($key);
$blob = serialize($value);
$expiration = $ttl === null ? 0 : time() + $ttl;
$stmt = $this->db->prepare('INSERT OR REPLACE INTO storage (key, value, updated) VALUES (:key, :value, :updated)');
$stmt->bindValue(':key', $this->getCacheKey());
$stmt->bindValue(':key', $cacheKey);
$stmt->bindValue(':value', $blob, \SQLITE3_BLOB);
$stmt->bindValue(':updated', time());
$stmt->execute();
}
public function getTime(): ?int
{
$stmt = $this->db->prepare('SELECT updated FROM storage WHERE key = :key');
$stmt->bindValue(':key', $this->getCacheKey());
$stmt->bindValue(':updated', $expiration);
$result = $stmt->execute();
if ($result) {
$row = $result->fetchArray(\SQLITE3_ASSOC);
if ($row !== false) {
return $row['updated'];
}
}
return null;
// Unclear whether we should $result->finalize(); here?
}
public function purgeCache(int $timeout = 86400): void
public function delete(string $key): void
{
$key = $this->createCacheKey($key);
$stmt = $this->db->prepare('DELETE FROM storage WHERE key = :key');
$stmt->bindValue(':key', $key);
$result = $stmt->execute();
}
public function prune(): void
{
if (!$this->config['enable_purge']) {
return;
}
$stmt = $this->db->prepare('DELETE FROM storage WHERE updated < :expired');
$stmt->bindValue(':expired', time() - $timeout);
$stmt->execute();
$stmt = $this->db->prepare('DELETE FROM storage WHERE updated <= :now');
$stmt->bindValue(':now', time());
$result = $stmt->execute();
}
public function setScope(string $scope): void
public function clear(): void
{
$this->scope = $scope;
$this->db->query('DELETE FROM storage');
}
public function setKey(array $key): void
private function createCacheKey($key)
{
$this->key = json_encode($key);
}
private function getCacheKey()
{
return hash('sha1', $this->scope . $this->key, true);
return hash('sha1', $key, true);
}
}

View file

@ -55,7 +55,7 @@ max_filesize = 20
[cache]
; Cache type: file, sqlite, memcached, null
; Cache type: file, sqlite, memcached, array, null
type = "file"
; Allow users to specify custom timeout for specific requests.

View file

@ -15,14 +15,17 @@ while ($next) { /* Collect all contributors */
'User-Agent' => 'RSS-Bridge',
];
$httpClient = new CurlHttpClient();
$result = $httpClient->request($url, ['headers' => $headers]);
$response = $httpClient->request($url, ['headers' => $headers]);
foreach (json_decode($result['body']) as $contributor) {
$json = $response->getBody();
$json_decode = Json::decode($json, false);
foreach ($json_decode as $contributor) {
$contributors[] = $contributor;
}
// Extract links to "next", "last", etc...
$links = explode(',', $result['headers']['link'][0]);
$link1 = $response->getHeader('link');
$links = explode(',', $link1);
$next = false;
// Check if there is a link with 'rel="next"'

View file

@ -5,10 +5,12 @@ The `getInput` function is used to receive a value for a parameter, specified in
$this->getInput('your input name here');
```
`getInput` will either return the value for your parameter or `null` if the parameter is unknown or not specified.
`getInput` will either return the value for your parameter
or `null` if the parameter is unknown or not specified.
# getKey
The `getKey` function is used to receive the key name to a selected list value given the name of the list, specified in `const PARAMETERS`
The `getKey` function is used to receive the key name to a selected list
value given the name of the list, specified in `const PARAMETERS`
Is able to work with multidimensional list arrays.
```PHP
@ -34,7 +36,8 @@ $this->getKey('country');
// if the selected value was "ve", this function will return "Venezuela"
```
`getKey` will either return the key name for your parameter or `null` if the parameter is unknown or not specified.
`getKey` will either return the key name for your parameter or `null` if the parameter
is unknown or not specified.
# getContents
The `getContents` function uses [cURL](https://secure.php.net/manual/en/book.curl.php) to acquire data from the specified URI while respecting the various settings defined at a global level by RSS-Bridge (i.e., proxy host, user agent, etc.). This function accepts a few parameters:
@ -53,33 +56,29 @@ $html = getContents($url, $header, $opts);
```
# getSimpleHTMLDOM
The `getSimpleHTMLDOM` function is a wrapper for the [simple_html_dom](https://simplehtmldom.sourceforge.io/) [file_get_html](https://simplehtmldom.sourceforge.io/docs/1.9/api/file_get_html/) function in order to provide context by design.
The `getSimpleHTMLDOM` function is a wrapper for the
[simple_html_dom](https://simplehtmldom.sourceforge.io/) [file_get_html](https://simplehtmldom.sourceforge.io/docs/1.9/api/file_get_html/) function in order to provide context by design.
```PHP
$html = getSimpleHTMLDOM('your URI');
```
# getSimpleHTMLDOMCached
The `getSimpleHTMLDOMCached` function does the same as the [`getSimpleHTMLDOM`](#getsimplehtmldom) function, except that the content received for the given URI is stored in a cache and loaded from cache on the next request if the specified cache duration was not reached. Use this function for data that is very unlikely to change between consecutive requests to **RSS-Bridge**. This function allows to specify the cache duration with the second parameter (default is 24 hours / 86400 seconds).
The `getSimpleHTMLDOMCached` function does the same as the
[`getSimpleHTMLDOM`](#getsimplehtmldom) function,
except that the content received for the given URI is stored in a cache
and loaded from cache on the next request if the specified cache duration
was not reached.
Use this function for data that is very unlikely to change between consecutive requests to **RSS-Bridge**.
This function allows to specify the cache duration with the second parameter.
```PHP
$html = getSimpleHTMLDOMCached('your URI', 86400); // Duration 24h
```
**Notice:** Due to the current implementation a value greater than 86400 seconds (24 hours) will not work as the cache is purged every 24 hours automatically.
# returnError
**Notice:** Whenever possible make use of [`returnClientError`](#returnclienterror) or [`returnServerError`](#returnservererror)
The `returnError` function aborts execution of the current bridge and returns the given error message with the provided error number:
```PHP
returnError('Your error message', 404);
```
Check the [list of error codes](https://en.wikipedia.org/wiki/List_of_HTTP_status_codes) for applicable error numbers.
# returnClientError
The `returnClientError` function aborts execution of the current bridge and returns the given error message with error code **400**:
The `returnClientError` function aborts execution of the current bridge
and returns the given error message with error code **400**:
```PHP
returnClientError('Your error message')
@ -94,10 +93,12 @@ The `returnServerError` function aborts execution of the current bridge and retu
returnServerError('Your error message')
```
Use this function when a problem occurs that has nothing to do with the parameters provided by the user. (like: Host service gone missing, empty data received, etc...)
Use this function when a problem occurs that has nothing to do with the parameters provided by the user.
(like: Host service gone missing, empty data received, etc...)
# defaultLinkTo
Automatically replaces any relative URL in a given string or DOM object (i.e. the one returned by [getSimpleHTMLDOM](#getsimplehtmldom)) with an absolute URL.
Automatically replaces any relative URL in a given string or DOM object
(i.e. the one returned by [getSimpleHTMLDOM](#getsimplehtmldom)) with an absolute URL.
```php
defaultLinkTo ( mixed $content, string $server ) : object

View file

@ -3,16 +3,14 @@ See `CacheInterface`.
```php
interface CacheInterface
{
public function setScope(string $scope): void;
public function get(string $key, $default = null);
public function setKey(array $key): void;
public function set(string $key, $value, int $ttl = null): void;
public function loadData();
public function delete(string $key): void;
public function saveData($data): void;
public function clear(): void;
public function getTime(): ?int;
public function purgeCache(int $seconds): void;
public function prune(): void;
}
```

View file

@ -1,4 +1,8 @@
**RSS-Bridge** is free and open source software for generating Atom or RSS feeds from websites which don't have one. It is written in PHP and intended to run on a Web server. See the [Screenshots](01_General/04_Screenshots.md) for a quick introduction to **RSS-Bridge**
RSS-Bridge is a web application.
It generates web feeds for websites that don't have one.
Officially hosted instance: https://rss-bridge.org/bridge01/
- You want to know more about **RSS-Bridge**?
Check out our **[project goals](01_General/01_Project-goals.md)**.

View file

@ -1,5 +1,9 @@
<?php
if (version_compare(\PHP_VERSION, '7.4.0') === -1) {
exit('RSS-Bridge requires minimum PHP version 7.4.0!');
}
require_once __DIR__ . '/lib/bootstrap.php';
$rssBridge = new RssBridge();

View file

@ -116,6 +116,10 @@ abstract class BridgeAbstract implements BridgeInterface
*/
private array $configuration = [];
public function __construct()
{
}
/** {@inheritdoc} */
public function getItems()
{
@ -410,15 +414,13 @@ abstract class BridgeAbstract implements BridgeInterface
/**
* Loads a cached value for the specified key
*
* @param int $timeout Cache duration (optional)
* @return mixed Cached value or null if the key doesn't exist or has expired
*/
protected function loadCacheValue(string $key, int $timeout = 86400)
protected function loadCacheValue(string $key)
{
$cache = RssBridge::getCache();
$cache->setScope($this->getShortName());
$cache->setKey([$key]);
return $cache->loadData($timeout);
$cacheKey = $this->getShortName() . '_' . $key;
return $cache->get($cacheKey);
}
/**
@ -426,12 +428,11 @@ abstract class BridgeAbstract implements BridgeInterface
*
* @param mixed $value Value to cache
*/
protected function saveCacheValue(string $key, $value)
protected function saveCacheValue(string $key, $value, $ttl = 86400)
{
$cache = RssBridge::getCache();
$cache->setScope($this->getShortName());
$cache->setKey([$key]);
$cache->saveData($value);
$cacheKey = $this->getShortName() . '_' . $key;
$cache->set($cacheKey, $value, $ttl);
}
public function getShortName(): string

View file

@ -57,6 +57,8 @@ interface BridgeInterface
{
/**
* Collects data from the site
*
* @return void
*/
public function collectData();

View file

@ -72,7 +72,29 @@ class CacheFactory
'enable_purge' => Configuration::getConfig('SQLiteCache', 'enable_purge'),
]);
case MemcachedCache::class:
return new MemcachedCache();
if (!extension_loaded('memcached')) {
throw new \Exception('"memcached" extension not loaded. Please check "php.ini"');
}
$section = 'MemcachedCache';
$host = Configuration::getConfig($section, 'host');
$port = Configuration::getConfig($section, 'port');
if (empty($host) && empty($port)) {
throw new \Exception('Configuration for ' . $section . ' missing.');
}
if (empty($host)) {
throw new \Exception('"host" param is not set for ' . $section);
}
if (empty($port)) {
throw new \Exception('"port" param is not set for ' . $section);
}
if (!ctype_digit($port)) {
throw new \Exception('"port" param is invalid for ' . $section);
}
$port = intval($port);
if ($port < 1 || $port > 65535) {
throw new \Exception('"port" param is invalid for ' . $section);
}
return new MemcachedCache($host, $port);
default:
if (!file_exists(PATH_LIB_CACHES . $className . '.php')) {
throw new \Exception('Unable to find the cache file');

View file

@ -2,15 +2,13 @@
interface CacheInterface
{
public function setScope(string $scope): void;
public function get(string $key, $default = null);
public function setKey(array $key): void;
public function set(string $key, $value, int $ttl = null): void;
public function loadData(int $timeout = 86400);
public function delete(string $key): void;
public function saveData($data): void;
public function clear(): void;
public function getTime(): ?int;
public function purgeCache(int $timeout = 86400): void;
public function prune(): void;
}

View file

@ -37,10 +37,6 @@ final class Configuration
*/
public static function verifyInstallation()
{
if (version_compare(\PHP_VERSION, '7.4.0') === -1) {
throw new \Exception('RSS-Bridge requires at least PHP version 7.4.0!');
}
$errors = [];
// OpenSSL: https://www.php.net/manual/en/book.openssl.php
@ -211,6 +207,9 @@ final class Configuration
if (!is_string(self::getConfig('error', 'output'))) {
self::throwConfigError('error', 'output', 'Is not a valid String');
}
if (!in_array(self::getConfig('error', 'output'), ['feed', 'http', 'none'])) {
self::throwConfigError('error', 'output', 'Invalid output');
}
if (
!is_numeric(self::getConfig('error', 'report_limit'))

View file

@ -100,8 +100,8 @@ abstract class FeedExpander extends BridgeAbstract
'*/*',
];
$httpHeaders = ['Accept: ' . implode(', ', $mimeTypes)];
$content = getContents($url, $httpHeaders);
if ($content === '') {
$xml = getContents($url, $httpHeaders);
if ($xml === '') {
throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10);
}
// Maybe move this call earlier up the stack frames
@ -109,7 +109,7 @@ abstract class FeedExpander extends BridgeAbstract
libxml_use_internal_errors(true);
// Consider replacing libxml with https://www.php.net/domdocument
// Intentionally not using the silencing operator (@) because it has no effect here
$rssContent = simplexml_load_string(trim($content));
$rssContent = simplexml_load_string(trim($xml));
if ($rssContent === false) {
$xmlErrors = libxml_get_errors();
foreach ($xmlErrors as $xmlError) {

View file

@ -28,15 +28,7 @@ interface FormatInterface
*/
public function stringify();
/**
* Set items
*
* @param array $bridges The items
* @return self The format object
*
* @todo Rename parameter `$bridges` to `$items`
*/
public function setItems(array $bridges);
public function setItems(array $items);
/**
* Return items

View file

@ -66,13 +66,24 @@ final class Logger
}
}
}
// Intentionally not sanitizing $message
if ($context) {
try {
$context = Json::encode($context);
} catch (\JsonException $e) {
$context['message'] = null;
$context = Json::encode($context);
}
} else {
$context = '';
}
$text = sprintf(
"[%s] rssbridge.%s %s %s\n",
now()->format('Y-m-d H:i:s'),
$level,
// Intentionally not sanitizing $message
$message,
$context ? Json::encode($context) : ''
$context
);
// Log to stderr/stdout whatever that is
@ -81,6 +92,6 @@ final class Logger
// Log to file
// todo: extract to log handler
// file_put_contents('/tmp/rss-bridge.log', $text, FILE_APPEND | LOCK_EX);
//$bytes = file_put_contents('/tmp/rss-bridge.log', $text, FILE_APPEND | LOCK_EX);
}
}

View file

@ -5,25 +5,7 @@ final class RssBridge
private static HttpClient $httpClient;
private static CacheInterface $cache;
public function main(array $argv = [])
{
if ($argv) {
parse_str(implode('&', array_slice($argv, 1)), $cliArgs);
$request = $cliArgs;
} else {
$request = array_merge($_GET, $_POST);
}
try {
$this->run($request);
} catch (\Throwable $e) {
Logger::error(sprintf('Exception in RssBridge::main(): %s', create_sane_exception_message($e)), ['e' => $e]);
http_response_code(500);
print render(__DIR__ . '/../templates/error.html.php', ['e' => $e]);
}
}
private function run($request): void
public function __construct()
{
Configuration::verifyInstallation();
@ -33,6 +15,13 @@ final class RssBridge
}
Configuration::loadConfiguration($customConfig, getenv());
set_exception_handler(function (\Throwable $e) {
Logger::error('Uncaught Exception', ['e' => $e]);
http_response_code(500);
print render(__DIR__ . '/../templates/error.html.php', ['e' => $e]);
exit(1);
});
set_error_handler(function ($code, $message, $file, $line) {
if ((error_reporting() & $code) === 0) {
return false;
@ -45,7 +34,6 @@ final class RssBridge
);
Logger::warning($text);
if (Debug::isEnabled()) {
// todo: extract to log handler
print sprintf("<pre>%s</pre>\n", e($text));
}
});
@ -72,38 +60,58 @@ final class RssBridge
// Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED);
date_default_timezone_set(Configuration::getConfig('system', 'timezone'));
$cacheFactory = new CacheFactory();
self::$httpClient = new CurlHttpClient();
self::$cache = $cacheFactory->create();
$cacheFactory = new CacheFactory();
if (Debug::isEnabled()) {
self::$cache = $cacheFactory->create('array');
} else {
self::$cache = $cacheFactory->create();
}
if (Configuration::getConfig('authentication', 'enable')) {
$authenticationMiddleware = new AuthenticationMiddleware();
$authenticationMiddleware();
}
}
foreach ($request as $key => $value) {
if (!is_string($value)) {
throw new \Exception("Query parameter \"$key\" is not a string.");
public function main(array $argv = []): void
{
if ($argv) {
parse_str(implode('&', array_slice($argv, 1)), $cliArgs);
$request = $cliArgs;
} else {
$request = array_merge($_GET, $_POST);
}
try {
foreach ($request as $key => $value) {
if (!is_string($value)) {
throw new \Exception("Query parameter \"$key\" is not a string.");
}
}
}
$actionName = $request['action'] ?? 'Frontpage';
$actionName = strtolower($actionName) . 'Action';
$actionName = implode(array_map('ucfirst', explode('-', $actionName)));
$actionName = $request['action'] ?? 'Frontpage';
$actionName = strtolower($actionName) . 'Action';
$actionName = implode(array_map('ucfirst', explode('-', $actionName)));
$filePath = __DIR__ . '/../actions/' . $actionName . '.php';
if (!file_exists($filePath)) {
throw new \Exception(sprintf('Invalid action: %s', $actionName));
}
$className = '\\' . $actionName;
$action = new $className();
$filePath = __DIR__ . '/../actions/' . $actionName . '.php';
if (!file_exists($filePath)) {
throw new \Exception('Invalid action', 400);
}
$className = '\\' . $actionName;
$action = new $className();
$response = $action->execute($request);
if (is_string($response)) {
print $response;
} elseif ($response instanceof Response) {
$response->send();
$response = $action->execute($request);
if (is_string($response)) {
print $response;
} elseif ($response instanceof Response) {
$response->send();
}
} catch (\Throwable $e) {
Logger::error('Exception in RssBridge::main()', ['e' => $e]);
http_response_code(500);
print render(__DIR__ . '/../templates/error.html.php', ['e' => $e]);
}
}
@ -114,6 +122,12 @@ final class RssBridge
public static function getCache(): CacheInterface
{
return self::$cache;
return self::$cache ?? new NullCache();
}
public function clearCache()
{
$cache = self::getCache();
$cache->clear();
}
}

View file

@ -12,11 +12,9 @@ class TwitterClient
{
$this->cache = $cache;
$cache->setScope('twitter');
$cache->setKey(['cache']);
$cache->purgeCache(60 * 60 * 3);
$data = $this->cache->get('twitter') ?? [];
$this->data = $data;
$this->data = $this->cache->loadData() ?? [];
$this->authorization = 'AAAAAAAAAAAAAAAAAAAAAGHtAgAAAAAA%2Bx7ILXNILCqkSGIzy6faIHZ9s3Q%3DQy97w6SIrzE7lQwPJEYQBsArEE2fC25caFwRBvAGi456G09vGR';
$this->tw_consumer_key = '3nVuSoBZnx6U4vzUxf5w';
$this->tw_consumer_secret = 'Bcs59EFbbsdF6Sl9Ng71smgStWEGwXXKSjYvPVt7qys';
@ -273,9 +271,7 @@ class TwitterClient
$guest_token = json_decode($response)->guest_token;
$this->data['guest_token'] = $guest_token;
$this->cache->setScope('twitter');
$this->cache->setKey(['cache']);
$this->cache->saveData($this->data);
$this->cache->set('twitter', $this->data);
}
private function fetchUserInfoByScreenName(string $screenName)
@ -299,9 +295,7 @@ class TwitterClient
$userInfo = $response->data->user;
$this->data[$screenName] = $userInfo;
$this->cache->setScope('twitter');
$this->cache->setKey(['cache']);
$this->cache->saveData($this->data);
$this->cache->set('twitter', $this->data);
return $userInfo;
}
@ -434,9 +428,7 @@ class TwitterClient
$listInfo = $response->data->user_by_screen_name->list;
$this->data[$screenName . '-' . $listSlug] = $listInfo;
$this->cache->setScope('twitter');
$this->cache->setKey(['cache']);
$this->cache->saveData($this->data);
$this->cache->set('twitter', $this->data);
return $listInfo;
}

View file

@ -39,10 +39,10 @@ const MAX_FILE_SIZE = 10000000;
// Files
$files = [
__DIR__ . '/../lib/html.php',
__DIR__ . '/../lib/error.php',
__DIR__ . '/../lib/contents.php',
__DIR__ . '/../lib/php8backports.php',
__DIR__ . '/../lib/utils.php',
__DIR__ . '/../lib/http.php',
// Vendor
__DIR__ . '/../vendor/parsedown/Parsedown.php',
__DIR__ . '/../vendor/php-urljoin/src/urljoin.php',

View file

@ -1,101 +1,11 @@
<?php
final class Response
{
public const STATUS_CODES = [
'100' => 'Continue',
'101' => 'Switching Protocols',
'200' => 'OK',
'201' => 'Created',
'202' => 'Accepted',
'203' => 'Non-Authoritative Information',
'204' => 'No Content',
'205' => 'Reset Content',
'206' => 'Partial Content',
'300' => 'Multiple Choices',
'301' => 'Moved Permanently',
'302' => 'Found',
'303' => 'See Other',
'304' => 'Not Modified',
'305' => 'Use Proxy',
'400' => 'Bad Request',
'401' => 'Unauthorized',
'402' => 'Payment Required',
'403' => 'Forbidden',
'404' => 'Not Found',
'405' => 'Method Not Allowed',
'406' => 'Not Acceptable',
'407' => 'Proxy Authentication Required',
'408' => 'Request Timeout',
'409' => 'Conflict',
'410' => 'Gone',
'411' => 'Length Required',
'412' => 'Precondition Failed',
'413' => 'Request Entity Too Large',
'414' => 'Request-URI Too Long',
'415' => 'Unsupported Media Type',
'416' => 'Requested Range Not Satisfiable',
'417' => 'Expectation Failed',
'429' => 'Too Many Requests',
'500' => 'Internal Server Error',
'501' => 'Not Implemented',
'502' => 'Bad Gateway',
'503' => 'Service Unavailable',
'504' => 'Gateway Timeout',
'505' => 'HTTP Version Not Supported'
];
private string $body;
private int $code;
private array $headers;
public function __construct(
string $body = '',
int $code = 200,
array $headers = []
) {
$this->body = $body;
$this->code = $code;
$this->headers = $headers;
}
public function getBody()
{
return $this->body;
}
public function getCode()
{
return $this->code;
}
public function getHeaders()
{
return $this->headers;
}
public function send(): void
{
http_response_code($this->code);
foreach ($this->headers as $name => $value) {
header(sprintf('%s: %s', $name, $value));
}
print $this->body;
}
}
/**
* Fetch data from an http url
*
* @param array $httpHeaders E.g. ['Content-type: text/plain']
* @param array $curlOptions Associative array e.g. [CURLOPT_MAXREDIRS => 3]
* @param bool $returnFull Whether to return an array:
* [
* 'code' => int,
* 'header' => array,
* 'content' => string,
* 'status_lines' => array,
* ]
* @param bool $returnFull Whether to return an array: ['code' => int, 'headers' => array, 'content' => string]
* @return string|array
*/
function getContents(
@ -142,30 +52,35 @@ function getContents(
}
$cache = RssBridge::getCache();
$cache->setScope('server');
$cache->setKey([$url]);
$cacheKey = 'server_' . $url;
if (!Debug::isEnabled() && $cache->getTime() && $cache->loadData(86400 * 7)) {
$config['if_not_modified_since'] = $cache->getTime();
/** @var Response $cachedResponse */
$cachedResponse = $cache->get($cacheKey);
if ($cachedResponse) {
// considering popping
$cachedLastModified = $cachedResponse->getHeader('last-modified');
if ($cachedLastModified) {
$cachedLastModified = new \DateTimeImmutable($cachedLastModified);
$config['if_not_modified_since'] = $cachedLastModified->getTimestamp();
}
}
$response = $httpClient->request($url, $config);
switch ($response['code']) {
switch ($response->getCode()) {
case 200:
case 201:
case 202:
if (isset($response['headers']['cache-control'])) {
$cachecontrol = $response['headers']['cache-control'];
$lastValue = array_pop($cachecontrol);
$directives = explode(',', $lastValue);
$cacheControl = $response->getHeader('cache-control');
if ($cacheControl) {
$directives = explode(',', $cacheControl);
$directives = array_map('trim', $directives);
if (in_array('no-cache', $directives) || in_array('no-store', $directives)) {
// Don't cache as instructed by the server
break;
}
}
$cache->saveData($response['body']);
$cache->set($cacheKey, $response, 86400 * 10);
break;
case 301:
case 302:
@ -174,16 +89,16 @@ function getContents(
break;
case 304:
// Not Modified
$response['body'] = $cache->loadData(86400 * 7);
$response = $response->withBody($cachedResponse->getBody());
break;
default:
$exceptionMessage = sprintf(
'%s resulted in %s %s %s',
$url,
$response['code'],
Response::STATUS_CODES[$response['code']] ?? '',
$response->getCode(),
$response->getStatusLine(),
// If debug, include a part of the response body in the exception message
Debug::isEnabled() ? mb_substr($response['body'], 0, 500) : '',
Debug::isEnabled() ? mb_substr($response->getBody(), 0, 500) : '',
);
// The following code must be extracted if it grows too much
@ -194,141 +109,21 @@ function getContents(
'<title>Security | Glassdoor',
];
foreach ($cloudflareTitles as $cloudflareTitle) {
if (str_contains($response['body'], $cloudflareTitle)) {
throw new CloudFlareException($exceptionMessage, $response['code']);
if (str_contains($response->getBody(), $cloudflareTitle)) {
throw new CloudFlareException($exceptionMessage, $response->getCode());
}
}
throw new HttpException(trim($exceptionMessage), $response['code']);
throw new HttpException(trim($exceptionMessage), $response->getCode());
}
if ($returnFull === true) {
// For legacy reasons, use content instead of body
$response['content'] = $response['body'];
unset($response['body']);
return $response;
}
return $response['body'];
}
interface HttpClient
{
public function request(string $url, array $config = []): array;
}
final class CurlHttpClient implements HttpClient
{
public function request(string $url, array $config = []): array
{
$defaults = [
'useragent' => null,
'timeout' => 5,
'headers' => [],
'proxy' => null,
'curl_options' => [],
'if_not_modified_since' => null,
'retries' => 3,
'max_filesize' => null,
'max_redirections' => 5,
];
$config = array_merge($defaults, $config);
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']);
curl_setopt($ch, CURLOPT_HEADER, false);
$httpHeaders = [];
foreach ($config['headers'] as $name => $value) {
$httpHeaders[] = sprintf('%s: %s', $name, $value);
}
curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders);
if ($config['useragent']) {
curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']);
}
curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']);
curl_setopt($ch, CURLOPT_ENCODING, '');
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
if ($config['max_filesize']) {
// This option inspects the Content-Length header
curl_setopt($ch, CURLOPT_MAXFILESIZE, $config['max_filesize']);
curl_setopt($ch, CURLOPT_NOPROGRESS, false);
// This progress function will monitor responses who omit the Content-Length header
curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($ch, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($config) {
if ($downloaded > $config['max_filesize']) {
// Return a non-zero value to abort the transfer
return -1;
}
return 0;
});
}
if ($config['proxy']) {
curl_setopt($ch, CURLOPT_PROXY, $config['proxy']);
}
if (curl_setopt_array($ch, $config['curl_options']) === false) {
throw new \Exception('Tried to set an illegal curl option');
}
if ($config['if_not_modified_since']) {
curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']);
curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
}
$responseStatusLines = [];
$responseHeaders = [];
curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) {
$len = strlen($rawHeader);
if ($rawHeader === "\r\n") {
return $len;
}
if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) {
$responseStatusLines[] = $rawHeader;
return $len;
}
$header = explode(':', $rawHeader);
if (count($header) === 1) {
return $len;
}
$name = mb_strtolower(trim($header[0]));
$value = trim(implode(':', array_slice($header, 1)));
if (!isset($responseHeaders[$name])) {
$responseHeaders[$name] = [];
}
$responseHeaders[$name][] = $value;
return $len;
});
$attempts = 0;
while (true) {
$attempts++;
$data = curl_exec($ch);
if ($data !== false) {
// The network call was successful, so break out of the loop
break;
}
if ($attempts > $config['retries']) {
// Finally give up
$curl_error = curl_error($ch);
$curl_errno = curl_errno($ch);
throw new HttpException(sprintf(
'cURL error %s: %s (%s) for %s',
$curl_error,
$curl_errno,
'https://curl.haxx.se/libcurl/c/libcurl-errors.html',
$url
));
}
}
$statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
return [
'code' => $statusCode,
'status_lines' => $responseStatusLines,
'headers' => $responseHeaders,
'body' => $data,
'code' => $response->getCode(),
'headers' => $response->getHeaders(),
// For legacy reasons, use 'content' instead of 'body'
'content' => $response->getBody(),
];
}
return $response->getBody();
}
/**
@ -391,7 +186,7 @@ function getSimpleHTMLDOM(
* _Notice_: Cached contents are forcefully removed after 24 hours (86400 seconds).
*
* @param string $url The URL.
* @param int $timeout Cache duration in seconds.
* @param int $ttl Cache duration in seconds.
* @param array $header (optional) A list of cURL header.
* For more information follow the links below.
* * https://php.net/manual/en/function.curl-setopt.php
@ -416,7 +211,7 @@ function getSimpleHTMLDOM(
*/
function getSimpleHTMLDOMCached(
$url,
$timeout = 86400,
$ttl = 86400,
$header = [],
$opts = [],
$lowercase = true,
@ -427,14 +222,11 @@ function getSimpleHTMLDOMCached(
$defaultSpanText = DEFAULT_SPAN_TEXT
) {
$cache = RssBridge::getCache();
$cache->setScope('pages');
$cache->setKey([$url]);
$content = $cache->loadData($timeout);
if (!$content || Debug::isEnabled()) {
$cacheKey = 'pages_' . $url;
$content = $cache->get($cacheKey);
if (!$content) {
$content = getContents($url, $header ?? [], $opts ?? []);
$cache->setScope('pages');
$cache->setKey([$url]);
$cache->saveData($content);
$cache->set($cacheKey, $content, $ttl);
}
return str_get_html(
$content,

View file

@ -1,47 +0,0 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* Throws an exception when called.
*
* @throws \Exception when called
* @param string $message The error message
* @param int $code The HTTP error code
* @link https://en.wikipedia.org/wiki/List_of_HTTP_status_codes List of HTTP
* status codes
*/
function returnError($message, $code)
{
throw new \Exception($message, $code);
}
/**
* Returns HTTP Error 400 (Bad Request) when called.
*
* @param string $message The error message
*/
function returnClientError($message)
{
returnError($message, 400);
}
/**
* Returns HTTP Error 500 (Internal Server Error) when called.
*
* @param string $message The error message
*/
function returnServerError($message)
{
returnError($message, 500);
}

252
lib/http.php Normal file
View file

@ -0,0 +1,252 @@
<?php
class HttpException extends \Exception
{
}
final class CloudFlareException extends HttpException
{
}
interface HttpClient
{
public function request(string $url, array $config = []): Response;
}
final class CurlHttpClient implements HttpClient
{
public function request(string $url, array $config = []): Response
{
$defaults = [
'useragent' => null,
'timeout' => 5,
'headers' => [],
'proxy' => null,
'curl_options' => [],
'if_not_modified_since' => null,
'retries' => 3,
'max_filesize' => null,
'max_redirections' => 5,
];
$config = array_merge($defaults, $config);
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']);
curl_setopt($ch, CURLOPT_HEADER, false);
$httpHeaders = [];
foreach ($config['headers'] as $name => $value) {
$httpHeaders[] = sprintf('%s: %s', $name, $value);
}
curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders);
if ($config['useragent']) {
curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']);
}
curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']);
curl_setopt($ch, CURLOPT_ENCODING, '');
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
if ($config['max_filesize']) {
// This option inspects the Content-Length header
curl_setopt($ch, CURLOPT_MAXFILESIZE, $config['max_filesize']);
curl_setopt($ch, CURLOPT_NOPROGRESS, false);
// This progress function will monitor responses who omit the Content-Length header
curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($ch, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($config) {
if ($downloaded > $config['max_filesize']) {
// Return a non-zero value to abort the transfer
return -1;
}
return 0;
});
}
if ($config['proxy']) {
curl_setopt($ch, CURLOPT_PROXY, $config['proxy']);
}
if (curl_setopt_array($ch, $config['curl_options']) === false) {
throw new \Exception('Tried to set an illegal curl option');
}
if ($config['if_not_modified_since']) {
curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']);
curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
}
$responseStatusLines = [];
$responseHeaders = [];
curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) {
$len = strlen($rawHeader);
if ($rawHeader === "\r\n") {
return $len;
}
if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) {
$responseStatusLines[] = trim($rawHeader);
return $len;
}
$header = explode(':', $rawHeader);
if (count($header) === 1) {
return $len;
}
$name = mb_strtolower(trim($header[0]));
$value = trim(implode(':', array_slice($header, 1)));
if (!isset($responseHeaders[$name])) {
$responseHeaders[$name] = [];
}
$responseHeaders[$name][] = $value;
return $len;
});
$attempts = 0;
while (true) {
$attempts++;
$data = curl_exec($ch);
if ($data !== false) {
// The network call was successful, so break out of the loop
break;
}
if ($attempts > $config['retries']) {
// Finally give up
$curl_error = curl_error($ch);
$curl_errno = curl_errno($ch);
throw new HttpException(sprintf(
'cURL error %s: %s (%s) for %s',
$curl_error,
$curl_errno,
'https://curl.haxx.se/libcurl/c/libcurl-errors.html',
$url
));
}
}
$statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
return new Response($data, $statusCode, $responseHeaders);
}
}
final class Response
{
public const STATUS_CODES = [
'100' => 'Continue',
'101' => 'Switching Protocols',
'200' => 'OK',
'201' => 'Created',
'202' => 'Accepted',
'203' => 'Non-Authoritative Information',
'204' => 'No Content',
'205' => 'Reset Content',
'206' => 'Partial Content',
'300' => 'Multiple Choices',
'301' => 'Moved Permanently',
'302' => 'Found',
'303' => 'See Other',
'304' => 'Not Modified',
'305' => 'Use Proxy',
'400' => 'Bad Request',
'401' => 'Unauthorized',
'402' => 'Payment Required',
'403' => 'Forbidden',
'404' => 'Not Found',
'405' => 'Method Not Allowed',
'406' => 'Not Acceptable',
'407' => 'Proxy Authentication Required',
'408' => 'Request Timeout',
'409' => 'Conflict',
'410' => 'Gone',
'411' => 'Length Required',
'412' => 'Precondition Failed',
'413' => 'Request Entity Too Large',
'414' => 'Request-URI Too Long',
'415' => 'Unsupported Media Type',
'416' => 'Requested Range Not Satisfiable',
'417' => 'Expectation Failed',
'429' => 'Too Many Requests',
'500' => 'Internal Server Error',
'501' => 'Not Implemented',
'502' => 'Bad Gateway',
'503' => 'Service Unavailable',
'504' => 'Gateway Timeout',
'505' => 'HTTP Version Not Supported'
];
private string $body;
private int $code;
private array $headers;
public function __construct(
string $body = '',
int $code = 200,
array $headers = []
) {
$this->body = $body;
$this->code = $code;
$this->headers = [];
foreach ($headers as $name => $value) {
$name = mb_strtolower($name);
if (!isset($this->headers[$name])) {
$this->headers[$name] = [];
}
if (is_string($value)) {
$this->headers[$name][] = $value;
}
if (is_array($value)) {
$this->headers[$name] = $value;
}
}
}
public function getBody()
{
return $this->body;
}
public function getCode()
{
return $this->code;
}
public function getStatusLine(): string
{
return self::STATUS_CODES[$this->code] ?? '';
}
public function getHeaders()
{
return $this->headers;
}
/**
* @return string[]|string|null
*/
public function getHeader(string $name, bool $all = false)
{
$name = mb_strtolower($name);
$header = $this->headers[$name] ?? null;
if (!$header) {
return null;
}
if ($all) {
return $header;
}
return array_pop($header);
}
public function withBody(string $body): Response
{
$clone = clone $this;
$clone->body = $body;
return $clone;
}
public function send(): void
{
http_response_code($this->code);
foreach ($this->headers as $name => $values) {
foreach ($values as $value) {
header(sprintf('%s: %s', $name, $value));
}
}
print $this->body;
}
}

View file

@ -1,18 +1,17 @@
<?php
class HttpException extends \Exception
{
}
final class CloudFlareException extends HttpException
{
}
// https://github.com/nette/utils/blob/master/src/Utils/Json.php
final class Json
{
public static function encode($value): string
public static function encode($value, $pretty = true, bool $asciiSafe = false): string
{
$flags = JSON_PRETTY_PRINT | JSON_THROW_ON_ERROR | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE;
$flags = JSON_THROW_ON_ERROR | JSON_UNESCAPED_SLASHES;
if (!$asciiSafe) {
$flags = $flags | JSON_UNESCAPED_UNICODE;
}
if ($pretty) {
$flags = $flags | JSON_PRETTY_PRINT;
}
return \json_encode($value, $flags);
}
@ -237,3 +236,13 @@ function create_random_string(int $bytes = 16): string
{
return bin2hex(openssl_random_pseudo_bytes($bytes));
}
function returnClientError($message)
{
throw new \Exception($message, 400);
}
function returnServerError($message)
{
throw new \Exception($message, 500);
}

View file

@ -17,7 +17,8 @@ class ListActionTest extends TestCase
$action = new \ListAction();
$response = $action->execute([]);
$headers = $response->getHeaders();
$this->assertSame($headers['Content-Type'], 'application/json');
$contentType = $response->getHeader('content-type');
$this->assertSame($contentType, 'application/json');
}
public function testOutput()

View file

@ -27,17 +27,13 @@ class CacheTest extends TestCase
'path' => $temporaryFolder,
'enable_purge' => true,
]);
$sut->setScope('scope');
$sut->purgeCache(-1);
$sut->setKey(['key']);
$sut->clear();
$this->assertNull($sut->getTime());
$this->assertNull($sut->loadData());
$this->assertNull($sut->get('key'));
$sut->saveData('data');
$this->assertSame('data', $sut->loadData());
$this->assertIsNumeric($sut->getTime());
$sut->purgeCache(-1);
$sut->set('key', 'data', 5);
$this->assertSame('data', $sut->get('key'));
$sut->clear();
// Intentionally not deleting the temp folder
}