feat: add retry logic to the http client (#2692)

* refactor: extract http client

* feat: add retry logic to http client
This commit is contained in:
Dag 2022-05-08 03:58:57 +02:00 committed by GitHub
parent 0c7a7f320f
commit 5d77d14f9d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 139 additions and 317 deletions

View file

@ -610,8 +610,8 @@ EOD;
try {
$result = getContents($uri, $this->authHeaders, array(), true);
} catch (UnexpectedResponseException $e) {
switch ($e->getResponseCode()) {
} catch (HttpException $e) {
switch ($e->getCode()) {
case 401:
case 403:
if ($retries) {
@ -621,8 +621,8 @@ EOD;
continue 2;
}
default:
$code = $e->getResponseCode();
$data = $e->getResponseBody();
$code = $e->getCode();
$data = $e->getMessage();
returnServerError(<<<EOD
Failed to make api call: $api
HTTP Status: $code

View file

@ -1,36 +1,26 @@
<?php
/* Generate the "Contributors" list for README.md automatically utilizing the GitHub API */
require __DIR__ . '/../../lib/rssbridge.php';
$url = 'https://api.github.com/repos/rss-bridge/rss-bridge/contributors';
$contributors = array();
$next = true;
while($next) { /* Collect all contributors */
$c = curl_init();
curl_setopt($c, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($c, CURLOPT_HTTPHEADER, array(
$headers = [
'Accept: application/json',
'Content-Type: application/json',
'User-Agent: RSS-Bridge'
));
curl_setopt($c, CURLOPT_URL, $url);
curl_setopt($c, CURLOPT_HEADER, true);
];
$result = _http_request($url, ['headers' => $headers]);
$data = curl_exec($c);
$headerSize = curl_getinfo($c, CURLINFO_HEADER_SIZE);
$header = substr($data, 0, $headerSize);
$headers = parseResponseHeader($header);
curl_close($c);
foreach(json_decode(substr($data, $headerSize)) as $contributor)
foreach(json_decode($result['body']) as $contributor)
$contributors[] = $contributor;
// Extract links to "next", "last", etc...
$links = explode(',', $headers[0]['link']);
$links = explode(',', $result['headers']['link'][0]);
$next = false;
// Check if there is a link with 'rel="next"'
@ -57,38 +47,3 @@ usort($contributors, function($a, $b){
foreach($contributors as $contributor) {
echo " * [{$contributor->login}]({$contributor->html_url})\n";
}
/**
* Parses the provided response header into an associative array
*
* Based on https://stackoverflow.com/a/18682872
*/
function parseResponseHeader($header) {
$headers = array();
$requests = explode("\r\n\r\n", trim($header));
foreach ($requests as $request) {
$header = array();
foreach (explode("\r\n", $request) as $i => $line) {
if($i === 0) {
$header['http_code'] = $line;
} else {
list ($key, $value) = explode(': ', $line);
$header[$key] = $value;
}
}
$headers[] = $header;
}
return $headers;
}

View file

@ -1,244 +1,145 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
final class HttpException extends \Exception {}
/**
* Exception class to handle all errors, when executing getContents
*/
class GetContentsException extends \Exception {
public function __construct($details, $code = 0, Throwable $previous = null) {
$message = trim($this->getMessageHeading() . "\n$details");
$lastError = error_get_last();
if($lastError !== null)
$message .= "\nLast PHP Error: " . $lastError['message'];
parent::__construct($message, $code, $previous);
}
protected function getMessageHeading() {
return 'Could not get contents';
}
}
/**
* Exception class to handle HTTP responses with Cloudflare challenges
**/
class CloudflareChallengeException extends \Exception {
public function __construct($code = 0, Throwable $previous = null) {
$message = <<<EOD
The server responded with a Cloudflare challenge, which is not supported by RSS-Bridge!
If this error persists longer than a week, please consider opening an issue on GitHub!
EOD;
parent::__construct($message, $code, $previous);
}
}
/**
* Exception class to handle non-20x HTTP responses
**/
class UnexpectedResponseException extends \GetContentsException {
private $responseCode;
private $responseHeaders;
private $responseBody;
protected function getMessageHeading() {
return 'Unexpected response from upstream';
}
public function __construct($responseBody, $responseHeaders, $responseCode = 500, Throwable $previous = null) {
$this->responseCode = $responseCode;
$this->responseHeaders = $responseHeaders;
$this->responseBody = $responseBody;
parent::__construct('', $responseCode, $previous);
}
public function getResponseCode() {
return $this->responseCode;
}
public function getResponseHeaders() {
return $this->responseHeaders;
}
public function getResponseBody() {
return $this->responseBody;
}
}
/**
* Gets contents from the Internet.
*
* **Content caching** (disabled in debug mode)
*
* A copy of the received content is stored in a local cache folder `server/` at
* {@see PATH_CACHE}. The `If-Modified-Since` header is added to the request, if
* the provided URL has been cached before.
*
* When the server responds with `304 Not Modified`, the cached data is returned.
* This will improve response times and reduce bandwidth for servers that support
* the `If-Modified-Since` header.
*
* Cached files are forcefully removed after 24 hours.
*
* @link https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since
* If-Modified-Since
*
* @param string $url The URL.
* @param array $header (optional) A list of cURL header.
* For more information follow the links below.
* * https://php.net/manual/en/function.curl-setopt.php
* * https://curl.haxx.se/libcurl/c/CURLOPT_HTTPHEADER.html
* @param array $opts (optional) A list of cURL options as associative array in
* the format `$opts[$option] = $value;`, where `$option` is any `CURLOPT_XXX`
* option and `$value` the corresponding value.
* @param bool $returnHeader Returns an array of two elements 'header' and
* 'content' if enabled.
*
* For more information see http://php.net/manual/en/function.curl-setopt.php
* @return string|array The contents.
*/
function getContents($url, $header = array(), $opts = array(), $returnHeader = false){
Debug::log('Reading contents from "' . $url . '"');
// Initialize cache
$cacheFac = new CacheFactory();
$cacheFac->setWorkingDir(PATH_LIB_CACHES);
$cache = $cacheFac->create(Configuration::getConfig('cache', 'type'));
function getContents(
string $url,
array $httpHeaders = [],
array $curlOptions = [],
bool $returnHeader = false
) {
$cacheFactory = new CacheFactory();
$cacheFactory->setWorkingDir(PATH_LIB_CACHES);
$cache = $cacheFactory->create(Configuration::getConfig('cache', 'type'));
$cache->setScope('server');
$cache->purgeCache(86400); // 24 hours (forced)
$cache->setKey([$url]);
$params = array($url);
$cache->setKey($params);
$config = [
'headers' => $httpHeaders,
'curl_options' => $curlOptions,
];
if (defined('PROXY_URL') && !defined('NOPROXY')) {
$config['proxy'] = PROXY_URL;
}
if(!Debug::isEnabled() && $cache->getTime()) {
$config['if_not_modified_since'] = $cache->getTime();
}
$retVal = array(
'header' => '',
'content' => '',
);
$result = _http_request($url, $config);
$response = [
'header' => $result['headers'],
'content' => $result['body'],
];
switch($result['code']) {
case 200:
case 201:
case 202:
if(isset($result['headers']['cache-control'])) {
$cachecontrol = $result['headers']['cache-control'];
$lastValue = array_pop($cachecontrol);
$directives = explode(',', $lastValue);
$directives = array_map('trim', $directives);
if(in_array('no-cache', $directives) || in_array('no-store', $directives)) {
break;
}
}
$cache->saveData($result['body']);
break;
case 304: // Not Modified
$response['content'] = $cache->loadData();
break;
default:
throw new HttpException('', $result['code']);
}
if ($returnHeader === true) {
return $response;
}
return $response['content'];
}
/**
* Private function used internally
*
* Fetch content from url
*
* @throws HttpException
*/
function _http_request(string $url, array $config = []): array
{
$defaults = [
'useragent' => Configuration::getConfig('http', 'useragent'),
'timeout' => Configuration::getConfig('http', 'timeout'),
'headers' => [],
'proxy' => null,
'curl_options' => [],
'if_not_modified_since' => null,
'retries' => 3,
];
$config = array_merge($defaults, $config);
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
if(is_array($header) && count($header) !== 0) {
Debug::log('Setting headers: ' . json_encode($header));
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
}
curl_setopt($ch, CURLOPT_USERAGENT, Configuration::getConfig('http', 'useragent'));
curl_setopt($ch, CURLOPT_TIMEOUT, Configuration::getConfig('http', 'timeout'));
curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_HTTPHEADER, $config['headers']);
curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']);
curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']);
curl_setopt($ch, CURLOPT_ENCODING, '');
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
if(is_array($opts) && count($opts) !== 0) {
Debug::log('Setting options: ' . json_encode($opts));
foreach($opts as $key => $value) {
if($config['proxy']) {
curl_setopt($ch, CURLOPT_PROXY, $config['proxy']);
}
foreach($config['curl_options'] as $key => $value) {
curl_setopt($ch, $key, $value);
}
}
if(defined('PROXY_URL') && !defined('NOPROXY')) {
Debug::log('Setting proxy url: ' . PROXY_URL);
curl_setopt($ch, CURLOPT_PROXY, PROXY_URL);
}
// We always want the response header as part of the data!
curl_setopt($ch, CURLOPT_HEADER, true);
// Build "If-Modified-Since" header
if(!Debug::isEnabled() && $time = $cache->getTime()) { // Skip if cache file doesn't exist
Debug::log('Adding If-Modified-Since');
curl_setopt($ch, CURLOPT_TIMEVALUE, $time);
if ($config['if_not_modified_since']) {
curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']);
curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
}
// Enables logging for the outgoing header
curl_setopt($ch, CURLINFO_HEADER_OUT, true);
$responseHeaders = [];
curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders) {
$len = strlen($rawHeader);
if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader) || $rawHeader === "\r\n") {
return $len;
}
$header = explode(':', $rawHeader);
if (count($header) === 1) {
return $len;
}
$name = mb_strtolower(trim($header[0]));
$value = trim(implode(':', array_slice($header, 1)));
if (!isset($responseHeaders[$name])) {
$responseHeaders[$name] = [];
}
$responseHeaders[$name][] = $value;
return $len;
});
$attempts = 0;
while(true) {
$attempts++;
$data = curl_exec($ch);
$errorCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$curlError = curl_error($ch);
$curlErrno = curl_errno($ch);
$curlInfo = curl_getinfo($ch);
Debug::log('Outgoing header: ' . json_encode($curlInfo));
if($data === false)
Debug::log('Cant\'t download ' . $url . ' cUrl error: ' . $curlError . ' (' . $curlErrno . ')');
$headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
$header = substr($data, 0, $headerSize);
$retVal['header'] = $header;
Debug::log('Response header: ' . $header);
$headers = parseResponseHeader($header);
$finalHeader = end($headers);
if ($data !== false) {
// The network call was successful, so break out of the loop
break;
}
if ($attempts > $config['retries']) {
// Finally give up
throw new HttpException(sprintf('%s (%s)', curl_error($ch), curl_errno($ch)));
}
}
$statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
$finalHeader = array_change_key_case($finalHeader, CASE_LOWER);
switch($errorCode) {
case 200: // Contents OK
case 201: // Contents Created
case 202: // Contents Accepted
Debug::log('New contents received');
$data = substr($data, $headerSize);
// Disable caching if the server responds with "Cache-Control: no-cache"
// or "Cache-Control: no-store"
if(array_key_exists('cache-control', $finalHeader)) {
Debug::log('Server responded with "Cache-Control" header');
$directives = explode(',', $finalHeader['cache-control']);
$directives = array_map('trim', $directives);
if(in_array('no-cache', $directives)
|| in_array('no-store', $directives)) { // Skip caching
Debug::log('Skip server side caching');
$retVal['content'] = $data;
break;
}
}
Debug::log('Store response to cache');
$cache->saveData($data);
$retVal['content'] = $data;
break;
case 304: // Not modified, use cached data
Debug::log('Contents not modified on host, returning cached data');
$retVal['content'] = $cache->loadData();
break;
default:
if(array_key_exists('server', $finalHeader) && stripos($finalHeader['server'], 'cloudflare') !== false) {
throw new CloudflareChallengeException($errorCode);
}
if ($curlError || $curlErrno) {
throw new GetContentsException('cURL error: ' . $curlError . ' (' . $curlErrno . ')');
}
throw new UnexpectedResponseException($retVal['content'], $retVal['header'], $errorCode);
}
return ($returnHeader === true) ? $retVal : $retVal['content'];
return [
'code' => $statusCode,
'headers' => $responseHeaders,
'body' => $data,
];
}
/**
@ -277,7 +178,11 @@ function getSimpleHTMLDOM($url,
$defaultBRText = DEFAULT_BR_TEXT,
$defaultSpanText = DEFAULT_SPAN_TEXT){
$content = getContents($url, $header, $opts);
$content = getContents(
$url,
$header ?? [],
$opts ?? []
);
return str_get_html($content,
$lowercase,
$forceTagsClosed,
@ -362,44 +267,6 @@ function getSimpleHTMLDOMCached($url,
$defaultSpanText);
}
/**
* Parses the cURL response header into an associative array
*
* Based on https://stackoverflow.com/a/18682872
*
* @param string $header The cURL response header.
* @return array An associative array of response headers.
*/
function parseResponseHeader($header) {
$headers = array();
$requests = explode("\r\n\r\n", trim($header));
foreach ($requests as $request) {
$header = array();
foreach (explode("\r\n", $request) as $i => $line) {
if($i === 0) {
$header['http_code'] = $line;
} else {
list ($key, $value) = explode(':', $line);
$header[$key] = trim($value);
}
}
$headers[] = $header;
}
return $headers;
}
/**
* Determines the MIME type from a URL/Path file extension.
*