feat: add retry logic to the http client (#2692)

* refactor: extract http client

* feat: add retry logic to http client
This commit is contained in:
Dag 2022-05-08 03:58:57 +02:00 committed by GitHub
parent 0c7a7f320f
commit 5d77d14f9d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 139 additions and 317 deletions

View file

@ -610,8 +610,8 @@ EOD;
try { try {
$result = getContents($uri, $this->authHeaders, array(), true); $result = getContents($uri, $this->authHeaders, array(), true);
} catch (UnexpectedResponseException $e) { } catch (HttpException $e) {
switch ($e->getResponseCode()) { switch ($e->getCode()) {
case 401: case 401:
case 403: case 403:
if ($retries) { if ($retries) {
@ -621,8 +621,8 @@ EOD;
continue 2; continue 2;
} }
default: default:
$code = $e->getResponseCode(); $code = $e->getCode();
$data = $e->getResponseBody(); $data = $e->getMessage();
returnServerError(<<<EOD returnServerError(<<<EOD
Failed to make api call: $api Failed to make api call: $api
HTTP Status: $code HTTP Status: $code

View file

@ -1,36 +1,26 @@
<?php <?php
/* Generate the "Contributors" list for README.md automatically utilizing the GitHub API */ /* Generate the "Contributors" list for README.md automatically utilizing the GitHub API */
require __DIR__ . '/../../lib/rssbridge.php';
$url = 'https://api.github.com/repos/rss-bridge/rss-bridge/contributors'; $url = 'https://api.github.com/repos/rss-bridge/rss-bridge/contributors';
$contributors = array(); $contributors = array();
$next = true; $next = true;
while($next) { /* Collect all contributors */ while($next) { /* Collect all contributors */
$c = curl_init(); $headers = [
'Accept: application/json',
'Content-Type: application/json',
'User-Agent: RSS-Bridge'
];
$result = _http_request($url, ['headers' => $headers]);
curl_setopt($c, CURLOPT_RETURNTRANSFER, 1); foreach(json_decode($result['body']) as $contributor)
curl_setopt($c, CURLOPT_HTTPHEADER, array(
'Accept: application/json',
'Content-Type: application/json',
'User-Agent: RSS-Bridge'
));
curl_setopt($c, CURLOPT_URL, $url);
curl_setopt($c, CURLOPT_HEADER, true);
$data = curl_exec($c);
$headerSize = curl_getinfo($c, CURLINFO_HEADER_SIZE);
$header = substr($data, 0, $headerSize);
$headers = parseResponseHeader($header);
curl_close($c);
foreach(json_decode(substr($data, $headerSize)) as $contributor)
$contributors[] = $contributor; $contributors[] = $contributor;
// Extract links to "next", "last", etc... // Extract links to "next", "last", etc...
$links = explode(',', $headers[0]['link']); $links = explode(',', $result['headers']['link'][0]);
$next = false; $next = false;
// Check if there is a link with 'rel="next"' // Check if there is a link with 'rel="next"'
@ -57,38 +47,3 @@ usort($contributors, function($a, $b){
foreach($contributors as $contributor) { foreach($contributors as $contributor) {
echo " * [{$contributor->login}]({$contributor->html_url})\n"; echo " * [{$contributor->login}]({$contributor->html_url})\n";
} }
/**
* Parses the provided response header into an associative array
*
* Based on https://stackoverflow.com/a/18682872
*/
function parseResponseHeader($header) {
$headers = array();
$requests = explode("\r\n\r\n", trim($header));
foreach ($requests as $request) {
$header = array();
foreach (explode("\r\n", $request) as $i => $line) {
if($i === 0) {
$header['http_code'] = $line;
} else {
list ($key, $value) = explode(': ', $line);
$header[$key] = $value;
}
}
$headers[] = $header;
}
return $headers;
}

View file

@ -1,244 +1,145 @@
<?php <?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
final class HttpException extends \Exception {}
/** function getContents(
* Exception class to handle all errors, when executing getContents string $url,
*/ array $httpHeaders = [],
class GetContentsException extends \Exception { array $curlOptions = [],
public function __construct($details, $code = 0, Throwable $previous = null) { bool $returnHeader = false
$message = trim($this->getMessageHeading() . "\n$details"); ) {
$cacheFactory = new CacheFactory();
$lastError = error_get_last(); $cacheFactory->setWorkingDir(PATH_LIB_CACHES);
if($lastError !== null) $cache = $cacheFactory->create(Configuration::getConfig('cache', 'type'));
$message .= "\nLast PHP Error: " . $lastError['message'];
parent::__construct($message, $code, $previous);
}
protected function getMessageHeading() {
return 'Could not get contents';
}
}
/**
* Exception class to handle HTTP responses with Cloudflare challenges
**/
class CloudflareChallengeException extends \Exception {
public function __construct($code = 0, Throwable $previous = null) {
$message = <<<EOD
The server responded with a Cloudflare challenge, which is not supported by RSS-Bridge!
If this error persists longer than a week, please consider opening an issue on GitHub!
EOD;
parent::__construct($message, $code, $previous);
}
}
/**
* Exception class to handle non-20x HTTP responses
**/
class UnexpectedResponseException extends \GetContentsException {
private $responseCode;
private $responseHeaders;
private $responseBody;
protected function getMessageHeading() {
return 'Unexpected response from upstream';
}
public function __construct($responseBody, $responseHeaders, $responseCode = 500, Throwable $previous = null) {
$this->responseCode = $responseCode;
$this->responseHeaders = $responseHeaders;
$this->responseBody = $responseBody;
parent::__construct('', $responseCode, $previous);
}
public function getResponseCode() {
return $this->responseCode;
}
public function getResponseHeaders() {
return $this->responseHeaders;
}
public function getResponseBody() {
return $this->responseBody;
}
}
/**
* Gets contents from the Internet.
*
* **Content caching** (disabled in debug mode)
*
* A copy of the received content is stored in a local cache folder `server/` at
* {@see PATH_CACHE}. The `If-Modified-Since` header is added to the request, if
* the provided URL has been cached before.
*
* When the server responds with `304 Not Modified`, the cached data is returned.
* This will improve response times and reduce bandwidth for servers that support
* the `If-Modified-Since` header.
*
* Cached files are forcefully removed after 24 hours.
*
* @link https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since
* If-Modified-Since
*
* @param string $url The URL.
* @param array $header (optional) A list of cURL header.
* For more information follow the links below.
* * https://php.net/manual/en/function.curl-setopt.php
* * https://curl.haxx.se/libcurl/c/CURLOPT_HTTPHEADER.html
* @param array $opts (optional) A list of cURL options as associative array in
* the format `$opts[$option] = $value;`, where `$option` is any `CURLOPT_XXX`
* option and `$value` the corresponding value.
* @param bool $returnHeader Returns an array of two elements 'header' and
* 'content' if enabled.
*
* For more information see http://php.net/manual/en/function.curl-setopt.php
* @return string|array The contents.
*/
function getContents($url, $header = array(), $opts = array(), $returnHeader = false){
Debug::log('Reading contents from "' . $url . '"');
// Initialize cache
$cacheFac = new CacheFactory();
$cacheFac->setWorkingDir(PATH_LIB_CACHES);
$cache = $cacheFac->create(Configuration::getConfig('cache', 'type'));
$cache->setScope('server'); $cache->setScope('server');
$cache->purgeCache(86400); // 24 hours (forced) $cache->purgeCache(86400); // 24 hours (forced)
$cache->setKey([$url]);
$params = array($url); $config = [
$cache->setKey($params); 'headers' => $httpHeaders,
'curl_options' => $curlOptions,
];
if (defined('PROXY_URL') && !defined('NOPROXY')) {
$config['proxy'] = PROXY_URL;
}
if(!Debug::isEnabled() && $cache->getTime()) {
$config['if_not_modified_since'] = $cache->getTime();
}
$retVal = array( $result = _http_request($url, $config);
'header' => '', $response = [
'content' => '', 'header' => $result['headers'],
); 'content' => $result['body'],
];
switch($result['code']) {
case 200:
case 201:
case 202:
if(isset($result['headers']['cache-control'])) {
$cachecontrol = $result['headers']['cache-control'];
$lastValue = array_pop($cachecontrol);
$directives = explode(',', $lastValue);
$directives = array_map('trim', $directives);
if(in_array('no-cache', $directives) || in_array('no-store', $directives)) {
break;
}
}
$cache->saveData($result['body']);
break;
case 304: // Not Modified
$response['content'] = $cache->loadData();
break;
default:
throw new HttpException('', $result['code']);
}
if ($returnHeader === true) {
return $response;
}
return $response['content'];
}
/**
* Private function used internally
*
* Fetch content from url
*
* @throws HttpException
*/
function _http_request(string $url, array $config = []): array
{
$defaults = [
'useragent' => Configuration::getConfig('http', 'useragent'),
'timeout' => Configuration::getConfig('http', 'timeout'),
'headers' => [],
'proxy' => null,
'curl_options' => [],
'if_not_modified_since' => null,
'retries' => 3,
];
$config = array_merge($defaults, $config);
$ch = curl_init($url); $ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
if(is_array($header) && count($header) !== 0) { curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_HTTPHEADER, $config['headers']);
Debug::log('Setting headers: ' . json_encode($header)); curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header); curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']);
}
curl_setopt($ch, CURLOPT_USERAGENT, Configuration::getConfig('http', 'useragent'));
curl_setopt($ch, CURLOPT_TIMEOUT, Configuration::getConfig('http', 'timeout'));
curl_setopt($ch, CURLOPT_ENCODING, ''); curl_setopt($ch, CURLOPT_ENCODING, '');
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
if($config['proxy']) {
if(is_array($opts) && count($opts) !== 0) { curl_setopt($ch, CURLOPT_PROXY, $config['proxy']);
Debug::log('Setting options: ' . json_encode($opts));
foreach($opts as $key => $value) {
curl_setopt($ch, $key, $value);
}
} }
foreach($config['curl_options'] as $key => $value) {
if(defined('PROXY_URL') && !defined('NOPROXY')) { curl_setopt($ch, $key, $value);
Debug::log('Setting proxy url: ' . PROXY_URL);
curl_setopt($ch, CURLOPT_PROXY, PROXY_URL);
} }
if ($config['if_not_modified_since']) {
// We always want the response header as part of the data! curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']);
curl_setopt($ch, CURLOPT_HEADER, true);
// Build "If-Modified-Since" header
if(!Debug::isEnabled() && $time = $cache->getTime()) { // Skip if cache file doesn't exist
Debug::log('Adding If-Modified-Since');
curl_setopt($ch, CURLOPT_TIMEVALUE, $time);
curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE); curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
} }
// Enables logging for the outgoing header $responseHeaders = [];
curl_setopt($ch, CURLINFO_HEADER_OUT, true); curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders) {
$len = strlen($rawHeader);
if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader) || $rawHeader === "\r\n") {
return $len;
}
$header = explode(':', $rawHeader);
if (count($header) === 1) {
return $len;
}
$name = mb_strtolower(trim($header[0]));
$value = trim(implode(':', array_slice($header, 1)));
if (!isset($responseHeaders[$name])) {
$responseHeaders[$name] = [];
}
$responseHeaders[$name][] = $value;
return $len;
});
$data = curl_exec($ch); $attempts = 0;
$errorCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); while(true) {
$attempts++;
$curlError = curl_error($ch); $data = curl_exec($ch);
$curlErrno = curl_errno($ch); if ($data !== false) {
$curlInfo = curl_getinfo($ch); // The network call was successful, so break out of the loop
Debug::log('Outgoing header: ' . json_encode($curlInfo));
if($data === false)
Debug::log('Cant\'t download ' . $url . ' cUrl error: ' . $curlError . ' (' . $curlErrno . ')');
$headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
$header = substr($data, 0, $headerSize);
$retVal['header'] = $header;
Debug::log('Response header: ' . $header);
$headers = parseResponseHeader($header);
$finalHeader = end($headers);
curl_close($ch);
$finalHeader = array_change_key_case($finalHeader, CASE_LOWER);
switch($errorCode) {
case 200: // Contents OK
case 201: // Contents Created
case 202: // Contents Accepted
Debug::log('New contents received');
$data = substr($data, $headerSize);
// Disable caching if the server responds with "Cache-Control: no-cache"
// or "Cache-Control: no-store"
if(array_key_exists('cache-control', $finalHeader)) {
Debug::log('Server responded with "Cache-Control" header');
$directives = explode(',', $finalHeader['cache-control']);
$directives = array_map('trim', $directives);
if(in_array('no-cache', $directives)
|| in_array('no-store', $directives)) { // Skip caching
Debug::log('Skip server side caching');
$retVal['content'] = $data;
break;
}
}
Debug::log('Store response to cache');
$cache->saveData($data);
$retVal['content'] = $data;
break; break;
case 304: // Not modified, use cached data }
Debug::log('Contents not modified on host, returning cached data'); if ($attempts > $config['retries']) {
$retVal['content'] = $cache->loadData(); // Finally give up
break; throw new HttpException(sprintf('%s (%s)', curl_error($ch), curl_errno($ch)));
default: }
if(array_key_exists('server', $finalHeader) && stripos($finalHeader['server'], 'cloudflare') !== false) {
throw new CloudflareChallengeException($errorCode);
}
if ($curlError || $curlErrno) {
throw new GetContentsException('cURL error: ' . $curlError . ' (' . $curlErrno . ')');
}
throw new UnexpectedResponseException($retVal['content'], $retVal['header'], $errorCode);
} }
return ($returnHeader === true) ? $retVal : $retVal['content']; $statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
return [
'code' => $statusCode,
'headers' => $responseHeaders,
'body' => $data,
];
} }
/** /**
@ -277,7 +178,11 @@ function getSimpleHTMLDOM($url,
$defaultBRText = DEFAULT_BR_TEXT, $defaultBRText = DEFAULT_BR_TEXT,
$defaultSpanText = DEFAULT_SPAN_TEXT){ $defaultSpanText = DEFAULT_SPAN_TEXT){
$content = getContents($url, $header, $opts); $content = getContents(
$url,
$header ?? [],
$opts ?? []
);
return str_get_html($content, return str_get_html($content,
$lowercase, $lowercase,
$forceTagsClosed, $forceTagsClosed,
@ -362,44 +267,6 @@ function getSimpleHTMLDOMCached($url,
$defaultSpanText); $defaultSpanText);
} }
/**
* Parses the cURL response header into an associative array
*
* Based on https://stackoverflow.com/a/18682872
*
* @param string $header The cURL response header.
* @return array An associative array of response headers.
*/
function parseResponseHeader($header) {
$headers = array();
$requests = explode("\r\n\r\n", trim($header));
foreach ($requests as $request) {
$header = array();
foreach (explode("\r\n", $request) as $i => $line) {
if($i === 0) {
$header['http_code'] = $line;
} else {
list ($key, $value) = explode(':', $line);
$header[$key] = trim($value);
}
}
$headers[] = $header;
}
return $headers;
}
/** /**
* Determines the MIME type from a URL/Path file extension. * Determines the MIME type from a URL/Path file extension.
* *