mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2024-11-22 01:25:28 +03:00
feat: add retry logic to the http client (#2692)
* refactor: extract http client * feat: add retry logic to http client
This commit is contained in:
parent
0c7a7f320f
commit
5d77d14f9d
3 changed files with 139 additions and 317 deletions
|
@ -610,8 +610,8 @@ EOD;
|
|||
|
||||
try {
|
||||
$result = getContents($uri, $this->authHeaders, array(), true);
|
||||
} catch (UnexpectedResponseException $e) {
|
||||
switch ($e->getResponseCode()) {
|
||||
} catch (HttpException $e) {
|
||||
switch ($e->getCode()) {
|
||||
case 401:
|
||||
case 403:
|
||||
if ($retries) {
|
||||
|
@ -621,8 +621,8 @@ EOD;
|
|||
continue 2;
|
||||
}
|
||||
default:
|
||||
$code = $e->getResponseCode();
|
||||
$data = $e->getResponseBody();
|
||||
$code = $e->getCode();
|
||||
$data = $e->getMessage();
|
||||
returnServerError(<<<EOD
|
||||
Failed to make api call: $api
|
||||
HTTP Status: $code
|
||||
|
|
|
@ -1,36 +1,26 @@
|
|||
<?php
|
||||
/* Generate the "Contributors" list for README.md automatically utilizing the GitHub API */
|
||||
|
||||
require __DIR__ . '/../../lib/rssbridge.php';
|
||||
|
||||
$url = 'https://api.github.com/repos/rss-bridge/rss-bridge/contributors';
|
||||
$contributors = array();
|
||||
$next = true;
|
||||
|
||||
while($next) { /* Collect all contributors */
|
||||
|
||||
$c = curl_init();
|
||||
$headers = [
|
||||
'Accept: application/json',
|
||||
'Content-Type: application/json',
|
||||
'User-Agent: RSS-Bridge'
|
||||
];
|
||||
$result = _http_request($url, ['headers' => $headers]);
|
||||
|
||||
curl_setopt($c, CURLOPT_RETURNTRANSFER, 1);
|
||||
curl_setopt($c, CURLOPT_HTTPHEADER, array(
|
||||
'Accept: application/json',
|
||||
'Content-Type: application/json',
|
||||
'User-Agent: RSS-Bridge'
|
||||
));
|
||||
curl_setopt($c, CURLOPT_URL, $url);
|
||||
curl_setopt($c, CURLOPT_HEADER, true);
|
||||
|
||||
$data = curl_exec($c);
|
||||
|
||||
$headerSize = curl_getinfo($c, CURLINFO_HEADER_SIZE);
|
||||
$header = substr($data, 0, $headerSize);
|
||||
$headers = parseResponseHeader($header);
|
||||
|
||||
curl_close($c);
|
||||
|
||||
foreach(json_decode(substr($data, $headerSize)) as $contributor)
|
||||
foreach(json_decode($result['body']) as $contributor)
|
||||
$contributors[] = $contributor;
|
||||
|
||||
// Extract links to "next", "last", etc...
|
||||
$links = explode(',', $headers[0]['link']);
|
||||
$links = explode(',', $result['headers']['link'][0]);
|
||||
$next = false;
|
||||
|
||||
// Check if there is a link with 'rel="next"'
|
||||
|
@ -57,38 +47,3 @@ usort($contributors, function($a, $b){
|
|||
foreach($contributors as $contributor) {
|
||||
echo " * [{$contributor->login}]({$contributor->html_url})\n";
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the provided response header into an associative array
|
||||
*
|
||||
* Based on https://stackoverflow.com/a/18682872
|
||||
*/
|
||||
function parseResponseHeader($header) {
|
||||
|
||||
$headers = array();
|
||||
$requests = explode("\r\n\r\n", trim($header));
|
||||
|
||||
foreach ($requests as $request) {
|
||||
|
||||
$header = array();
|
||||
|
||||
foreach (explode("\r\n", $request) as $i => $line) {
|
||||
|
||||
if($i === 0) {
|
||||
$header['http_code'] = $line;
|
||||
} else {
|
||||
|
||||
list ($key, $value) = explode(': ', $line);
|
||||
$header[$key] = $value;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
$headers[] = $header;
|
||||
|
||||
}
|
||||
|
||||
return $headers;
|
||||
|
||||
}
|
||||
|
|
383
lib/contents.php
383
lib/contents.php
|
@ -1,244 +1,145 @@
|
|||
<?php
|
||||
/**
|
||||
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
|
||||
* Atom feeds for websites that don't have one.
|
||||
*
|
||||
* For the full license information, please view the UNLICENSE file distributed
|
||||
* with this source code.
|
||||
*
|
||||
* @package Core
|
||||
* @license http://unlicense.org/ UNLICENSE
|
||||
* @link https://github.com/rss-bridge/rss-bridge
|
||||
*/
|
||||
|
||||
final class HttpException extends \Exception {}
|
||||
|
||||
/**
|
||||
* Exception class to handle all errors, when executing getContents
|
||||
*/
|
||||
class GetContentsException extends \Exception {
|
||||
public function __construct($details, $code = 0, Throwable $previous = null) {
|
||||
$message = trim($this->getMessageHeading() . "\n$details");
|
||||
|
||||
$lastError = error_get_last();
|
||||
if($lastError !== null)
|
||||
$message .= "\nLast PHP Error: " . $lastError['message'];
|
||||
|
||||
parent::__construct($message, $code, $previous);
|
||||
}
|
||||
|
||||
protected function getMessageHeading() {
|
||||
return 'Could not get contents';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Exception class to handle HTTP responses with Cloudflare challenges
|
||||
**/
|
||||
class CloudflareChallengeException extends \Exception {
|
||||
public function __construct($code = 0, Throwable $previous = null) {
|
||||
$message = <<<EOD
|
||||
The server responded with a Cloudflare challenge, which is not supported by RSS-Bridge!
|
||||
If this error persists longer than a week, please consider opening an issue on GitHub!
|
||||
EOD;
|
||||
|
||||
parent::__construct($message, $code, $previous);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Exception class to handle non-20x HTTP responses
|
||||
**/
|
||||
class UnexpectedResponseException extends \GetContentsException {
|
||||
private $responseCode;
|
||||
private $responseHeaders;
|
||||
private $responseBody;
|
||||
|
||||
protected function getMessageHeading() {
|
||||
return 'Unexpected response from upstream';
|
||||
}
|
||||
|
||||
public function __construct($responseBody, $responseHeaders, $responseCode = 500, Throwable $previous = null) {
|
||||
$this->responseCode = $responseCode;
|
||||
$this->responseHeaders = $responseHeaders;
|
||||
$this->responseBody = $responseBody;
|
||||
|
||||
parent::__construct('', $responseCode, $previous);
|
||||
}
|
||||
|
||||
public function getResponseCode() {
|
||||
return $this->responseCode;
|
||||
}
|
||||
|
||||
public function getResponseHeaders() {
|
||||
return $this->responseHeaders;
|
||||
}
|
||||
|
||||
public function getResponseBody() {
|
||||
return $this->responseBody;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets contents from the Internet.
|
||||
*
|
||||
* **Content caching** (disabled in debug mode)
|
||||
*
|
||||
* A copy of the received content is stored in a local cache folder `server/` at
|
||||
* {@see PATH_CACHE}. The `If-Modified-Since` header is added to the request, if
|
||||
* the provided URL has been cached before.
|
||||
*
|
||||
* When the server responds with `304 Not Modified`, the cached data is returned.
|
||||
* This will improve response times and reduce bandwidth for servers that support
|
||||
* the `If-Modified-Since` header.
|
||||
*
|
||||
* Cached files are forcefully removed after 24 hours.
|
||||
*
|
||||
* @link https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since
|
||||
* If-Modified-Since
|
||||
*
|
||||
* @param string $url The URL.
|
||||
* @param array $header (optional) A list of cURL header.
|
||||
* For more information follow the links below.
|
||||
* * https://php.net/manual/en/function.curl-setopt.php
|
||||
* * https://curl.haxx.se/libcurl/c/CURLOPT_HTTPHEADER.html
|
||||
* @param array $opts (optional) A list of cURL options as associative array in
|
||||
* the format `$opts[$option] = $value;`, where `$option` is any `CURLOPT_XXX`
|
||||
* option and `$value` the corresponding value.
|
||||
* @param bool $returnHeader Returns an array of two elements 'header' and
|
||||
* 'content' if enabled.
|
||||
*
|
||||
* For more information see http://php.net/manual/en/function.curl-setopt.php
|
||||
* @return string|array The contents.
|
||||
*/
|
||||
function getContents($url, $header = array(), $opts = array(), $returnHeader = false){
|
||||
Debug::log('Reading contents from "' . $url . '"');
|
||||
|
||||
// Initialize cache
|
||||
$cacheFac = new CacheFactory();
|
||||
$cacheFac->setWorkingDir(PATH_LIB_CACHES);
|
||||
$cache = $cacheFac->create(Configuration::getConfig('cache', 'type'));
|
||||
function getContents(
|
||||
string $url,
|
||||
array $httpHeaders = [],
|
||||
array $curlOptions = [],
|
||||
bool $returnHeader = false
|
||||
) {
|
||||
$cacheFactory = new CacheFactory();
|
||||
$cacheFactory->setWorkingDir(PATH_LIB_CACHES);
|
||||
$cache = $cacheFactory->create(Configuration::getConfig('cache', 'type'));
|
||||
$cache->setScope('server');
|
||||
$cache->purgeCache(86400); // 24 hours (forced)
|
||||
$cache->setKey([$url]);
|
||||
|
||||
$params = array($url);
|
||||
$cache->setKey($params);
|
||||
$config = [
|
||||
'headers' => $httpHeaders,
|
||||
'curl_options' => $curlOptions,
|
||||
];
|
||||
if (defined('PROXY_URL') && !defined('NOPROXY')) {
|
||||
$config['proxy'] = PROXY_URL;
|
||||
}
|
||||
if(!Debug::isEnabled() && $cache->getTime()) {
|
||||
$config['if_not_modified_since'] = $cache->getTime();
|
||||
}
|
||||
|
||||
$retVal = array(
|
||||
'header' => '',
|
||||
'content' => '',
|
||||
);
|
||||
$result = _http_request($url, $config);
|
||||
$response = [
|
||||
'header' => $result['headers'],
|
||||
'content' => $result['body'],
|
||||
];
|
||||
|
||||
switch($result['code']) {
|
||||
case 200:
|
||||
case 201:
|
||||
case 202:
|
||||
if(isset($result['headers']['cache-control'])) {
|
||||
$cachecontrol = $result['headers']['cache-control'];
|
||||
$lastValue = array_pop($cachecontrol);
|
||||
$directives = explode(',', $lastValue);
|
||||
$directives = array_map('trim', $directives);
|
||||
if(in_array('no-cache', $directives) || in_array('no-store', $directives)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
$cache->saveData($result['body']);
|
||||
break;
|
||||
case 304: // Not Modified
|
||||
$response['content'] = $cache->loadData();
|
||||
break;
|
||||
default:
|
||||
throw new HttpException('', $result['code']);
|
||||
}
|
||||
if ($returnHeader === true) {
|
||||
return $response;
|
||||
}
|
||||
return $response['content'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Private function used internally
|
||||
*
|
||||
* Fetch content from url
|
||||
*
|
||||
* @throws HttpException
|
||||
*/
|
||||
function _http_request(string $url, array $config = []): array
|
||||
{
|
||||
$defaults = [
|
||||
'useragent' => Configuration::getConfig('http', 'useragent'),
|
||||
'timeout' => Configuration::getConfig('http', 'timeout'),
|
||||
'headers' => [],
|
||||
'proxy' => null,
|
||||
'curl_options' => [],
|
||||
'if_not_modified_since' => null,
|
||||
'retries' => 3,
|
||||
];
|
||||
$config = array_merge($defaults, $config);
|
||||
|
||||
$ch = curl_init($url);
|
||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
||||
|
||||
if(is_array($header) && count($header) !== 0) {
|
||||
|
||||
Debug::log('Setting headers: ' . json_encode($header));
|
||||
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
|
||||
|
||||
}
|
||||
|
||||
curl_setopt($ch, CURLOPT_USERAGENT, Configuration::getConfig('http', 'useragent'));
|
||||
curl_setopt($ch, CURLOPT_TIMEOUT, Configuration::getConfig('http', 'timeout'));
|
||||
curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
|
||||
curl_setopt($ch, CURLOPT_HEADER, false);
|
||||
curl_setopt($ch, CURLOPT_HTTPHEADER, $config['headers']);
|
||||
curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']);
|
||||
curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']);
|
||||
curl_setopt($ch, CURLOPT_ENCODING, '');
|
||||
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
|
||||
|
||||
if(is_array($opts) && count($opts) !== 0) {
|
||||
|
||||
Debug::log('Setting options: ' . json_encode($opts));
|
||||
|
||||
foreach($opts as $key => $value) {
|
||||
curl_setopt($ch, $key, $value);
|
||||
}
|
||||
if($config['proxy']) {
|
||||
curl_setopt($ch, CURLOPT_PROXY, $config['proxy']);
|
||||
}
|
||||
|
||||
if(defined('PROXY_URL') && !defined('NOPROXY')) {
|
||||
|
||||
Debug::log('Setting proxy url: ' . PROXY_URL);
|
||||
curl_setopt($ch, CURLOPT_PROXY, PROXY_URL);
|
||||
|
||||
foreach($config['curl_options'] as $key => $value) {
|
||||
curl_setopt($ch, $key, $value);
|
||||
}
|
||||
|
||||
// We always want the response header as part of the data!
|
||||
curl_setopt($ch, CURLOPT_HEADER, true);
|
||||
|
||||
// Build "If-Modified-Since" header
|
||||
if(!Debug::isEnabled() && $time = $cache->getTime()) { // Skip if cache file doesn't exist
|
||||
Debug::log('Adding If-Modified-Since');
|
||||
curl_setopt($ch, CURLOPT_TIMEVALUE, $time);
|
||||
if ($config['if_not_modified_since']) {
|
||||
curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']);
|
||||
curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
|
||||
}
|
||||
|
||||
// Enables logging for the outgoing header
|
||||
curl_setopt($ch, CURLINFO_HEADER_OUT, true);
|
||||
$responseHeaders = [];
|
||||
curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders) {
|
||||
$len = strlen($rawHeader);
|
||||
if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader) || $rawHeader === "\r\n") {
|
||||
return $len;
|
||||
}
|
||||
$header = explode(':', $rawHeader);
|
||||
if (count($header) === 1) {
|
||||
return $len;
|
||||
}
|
||||
$name = mb_strtolower(trim($header[0]));
|
||||
$value = trim(implode(':', array_slice($header, 1)));
|
||||
if (!isset($responseHeaders[$name])) {
|
||||
$responseHeaders[$name] = [];
|
||||
}
|
||||
$responseHeaders[$name][] = $value;
|
||||
return $len;
|
||||
});
|
||||
|
||||
$data = curl_exec($ch);
|
||||
$errorCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
|
||||
$curlError = curl_error($ch);
|
||||
$curlErrno = curl_errno($ch);
|
||||
$curlInfo = curl_getinfo($ch);
|
||||
|
||||
Debug::log('Outgoing header: ' . json_encode($curlInfo));
|
||||
|
||||
if($data === false)
|
||||
Debug::log('Cant\'t download ' . $url . ' cUrl error: ' . $curlError . ' (' . $curlErrno . ')');
|
||||
|
||||
$headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
|
||||
$header = substr($data, 0, $headerSize);
|
||||
$retVal['header'] = $header;
|
||||
|
||||
Debug::log('Response header: ' . $header);
|
||||
|
||||
$headers = parseResponseHeader($header);
|
||||
$finalHeader = end($headers);
|
||||
|
||||
curl_close($ch);
|
||||
|
||||
$finalHeader = array_change_key_case($finalHeader, CASE_LOWER);
|
||||
|
||||
switch($errorCode) {
|
||||
case 200: // Contents OK
|
||||
case 201: // Contents Created
|
||||
case 202: // Contents Accepted
|
||||
Debug::log('New contents received');
|
||||
$data = substr($data, $headerSize);
|
||||
// Disable caching if the server responds with "Cache-Control: no-cache"
|
||||
// or "Cache-Control: no-store"
|
||||
|
||||
if(array_key_exists('cache-control', $finalHeader)) {
|
||||
Debug::log('Server responded with "Cache-Control" header');
|
||||
$directives = explode(',', $finalHeader['cache-control']);
|
||||
$directives = array_map('trim', $directives);
|
||||
if(in_array('no-cache', $directives)
|
||||
|| in_array('no-store', $directives)) { // Skip caching
|
||||
Debug::log('Skip server side caching');
|
||||
$retVal['content'] = $data;
|
||||
break;
|
||||
}
|
||||
}
|
||||
Debug::log('Store response to cache');
|
||||
$cache->saveData($data);
|
||||
$retVal['content'] = $data;
|
||||
$attempts = 0;
|
||||
while(true) {
|
||||
$attempts++;
|
||||
$data = curl_exec($ch);
|
||||
if ($data !== false) {
|
||||
// The network call was successful, so break out of the loop
|
||||
break;
|
||||
case 304: // Not modified, use cached data
|
||||
Debug::log('Contents not modified on host, returning cached data');
|
||||
$retVal['content'] = $cache->loadData();
|
||||
break;
|
||||
default:
|
||||
if(array_key_exists('server', $finalHeader) && stripos($finalHeader['server'], 'cloudflare') !== false) {
|
||||
throw new CloudflareChallengeException($errorCode);
|
||||
}
|
||||
|
||||
if ($curlError || $curlErrno) {
|
||||
throw new GetContentsException('cURL error: ' . $curlError . ' (' . $curlErrno . ')');
|
||||
}
|
||||
|
||||
throw new UnexpectedResponseException($retVal['content'], $retVal['header'], $errorCode);
|
||||
}
|
||||
if ($attempts > $config['retries']) {
|
||||
// Finally give up
|
||||
throw new HttpException(sprintf('%s (%s)', curl_error($ch), curl_errno($ch)));
|
||||
}
|
||||
}
|
||||
|
||||
return ($returnHeader === true) ? $retVal : $retVal['content'];
|
||||
$statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
curl_close($ch);
|
||||
return [
|
||||
'code' => $statusCode,
|
||||
'headers' => $responseHeaders,
|
||||
'body' => $data,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -277,7 +178,11 @@ function getSimpleHTMLDOM($url,
|
|||
$defaultBRText = DEFAULT_BR_TEXT,
|
||||
$defaultSpanText = DEFAULT_SPAN_TEXT){
|
||||
|
||||
$content = getContents($url, $header, $opts);
|
||||
$content = getContents(
|
||||
$url,
|
||||
$header ?? [],
|
||||
$opts ?? []
|
||||
);
|
||||
return str_get_html($content,
|
||||
$lowercase,
|
||||
$forceTagsClosed,
|
||||
|
@ -362,44 +267,6 @@ function getSimpleHTMLDOMCached($url,
|
|||
$defaultSpanText);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the cURL response header into an associative array
|
||||
*
|
||||
* Based on https://stackoverflow.com/a/18682872
|
||||
*
|
||||
* @param string $header The cURL response header.
|
||||
* @return array An associative array of response headers.
|
||||
*/
|
||||
function parseResponseHeader($header) {
|
||||
|
||||
$headers = array();
|
||||
$requests = explode("\r\n\r\n", trim($header));
|
||||
|
||||
foreach ($requests as $request) {
|
||||
|
||||
$header = array();
|
||||
|
||||
foreach (explode("\r\n", $request) as $i => $line) {
|
||||
|
||||
if($i === 0) {
|
||||
$header['http_code'] = $line;
|
||||
} else {
|
||||
|
||||
list ($key, $value) = explode(':', $line);
|
||||
$header[$key] = trim($value);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
$headers[] = $header;
|
||||
|
||||
}
|
||||
|
||||
return $headers;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the MIME type from a URL/Path file extension.
|
||||
*
|
||||
|
|
Loading…
Reference in a new issue