diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index f0b8a691..ccb840eb 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -610,8 +610,8 @@ EOD; try { $result = getContents($uri, $this->authHeaders, array(), true); - } catch (UnexpectedResponseException $e) { - switch ($e->getResponseCode()) { + } catch (HttpException $e) { + switch ($e->getCode()) { case 401: case 403: if ($retries) { @@ -621,8 +621,8 @@ EOD; continue 2; } default: - $code = $e->getResponseCode(); - $data = $e->getResponseBody(); + $code = $e->getCode(); + $data = $e->getMessage(); returnServerError(<< $headers]); - curl_setopt($c, CURLOPT_RETURNTRANSFER, 1); - curl_setopt($c, CURLOPT_HTTPHEADER, array( - 'Accept: application/json', - 'Content-Type: application/json', - 'User-Agent: RSS-Bridge' - )); - curl_setopt($c, CURLOPT_URL, $url); - curl_setopt($c, CURLOPT_HEADER, true); - - $data = curl_exec($c); - - $headerSize = curl_getinfo($c, CURLINFO_HEADER_SIZE); - $header = substr($data, 0, $headerSize); - $headers = parseResponseHeader($header); - - curl_close($c); - - foreach(json_decode(substr($data, $headerSize)) as $contributor) + foreach(json_decode($result['body']) as $contributor) $contributors[] = $contributor; // Extract links to "next", "last", etc... - $links = explode(',', $headers[0]['link']); + $links = explode(',', $result['headers']['link'][0]); $next = false; // Check if there is a link with 'rel="next"' @@ -57,38 +47,3 @@ usort($contributors, function($a, $b){ foreach($contributors as $contributor) { echo " * [{$contributor->login}]({$contributor->html_url})\n"; } - -/** - * Parses the provided response header into an associative array - * - * Based on https://stackoverflow.com/a/18682872 - */ -function parseResponseHeader($header) { - - $headers = array(); - $requests = explode("\r\n\r\n", trim($header)); - - foreach ($requests as $request) { - - $header = array(); - - foreach (explode("\r\n", $request) as $i => $line) { - - if($i === 0) { - $header['http_code'] = $line; - } else { - - list ($key, $value) = explode(': ', $line); - $header[$key] = $value; - - } - - } - - $headers[] = $header; - - } - - return $headers; - -} diff --git a/lib/contents.php b/lib/contents.php index 4ceeb4cc..2910b47e 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -1,244 +1,145 @@ getMessageHeading() . "\n$details"); - - $lastError = error_get_last(); - if($lastError !== null) - $message .= "\nLast PHP Error: " . $lastError['message']; - - parent::__construct($message, $code, $previous); - } - - protected function getMessageHeading() { - return 'Could not get contents'; - } -} - -/** - * Exception class to handle HTTP responses with Cloudflare challenges - **/ -class CloudflareChallengeException extends \Exception { - public function __construct($code = 0, Throwable $previous = null) { - $message = <<responseCode = $responseCode; - $this->responseHeaders = $responseHeaders; - $this->responseBody = $responseBody; - - parent::__construct('', $responseCode, $previous); - } - - public function getResponseCode() { - return $this->responseCode; - } - - public function getResponseHeaders() { - return $this->responseHeaders; - } - - public function getResponseBody() { - return $this->responseBody; - } -} - -/** - * Gets contents from the Internet. - * - * **Content caching** (disabled in debug mode) - * - * A copy of the received content is stored in a local cache folder `server/` at - * {@see PATH_CACHE}. The `If-Modified-Since` header is added to the request, if - * the provided URL has been cached before. - * - * When the server responds with `304 Not Modified`, the cached data is returned. - * This will improve response times and reduce bandwidth for servers that support - * the `If-Modified-Since` header. - * - * Cached files are forcefully removed after 24 hours. - * - * @link https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since - * If-Modified-Since - * - * @param string $url The URL. - * @param array $header (optional) A list of cURL header. - * For more information follow the links below. - * * https://php.net/manual/en/function.curl-setopt.php - * * https://curl.haxx.se/libcurl/c/CURLOPT_HTTPHEADER.html - * @param array $opts (optional) A list of cURL options as associative array in - * the format `$opts[$option] = $value;`, where `$option` is any `CURLOPT_XXX` - * option and `$value` the corresponding value. - * @param bool $returnHeader Returns an array of two elements 'header' and - * 'content' if enabled. - * - * For more information see http://php.net/manual/en/function.curl-setopt.php - * @return string|array The contents. - */ -function getContents($url, $header = array(), $opts = array(), $returnHeader = false){ - Debug::log('Reading contents from "' . $url . '"'); - - // Initialize cache - $cacheFac = new CacheFactory(); - $cacheFac->setWorkingDir(PATH_LIB_CACHES); - $cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); +function getContents( + string $url, + array $httpHeaders = [], + array $curlOptions = [], + bool $returnHeader = false +) { + $cacheFactory = new CacheFactory(); + $cacheFactory->setWorkingDir(PATH_LIB_CACHES); + $cache = $cacheFactory->create(Configuration::getConfig('cache', 'type')); $cache->setScope('server'); $cache->purgeCache(86400); // 24 hours (forced) + $cache->setKey([$url]); - $params = array($url); - $cache->setKey($params); + $config = [ + 'headers' => $httpHeaders, + 'curl_options' => $curlOptions, + ]; + if (defined('PROXY_URL') && !defined('NOPROXY')) { + $config['proxy'] = PROXY_URL; + } + if(!Debug::isEnabled() && $cache->getTime()) { + $config['if_not_modified_since'] = $cache->getTime(); + } - $retVal = array( - 'header' => '', - 'content' => '', - ); + $result = _http_request($url, $config); + $response = [ + 'header' => $result['headers'], + 'content' => $result['body'], + ]; + + switch($result['code']) { + case 200: + case 201: + case 202: + if(isset($result['headers']['cache-control'])) { + $cachecontrol = $result['headers']['cache-control']; + $lastValue = array_pop($cachecontrol); + $directives = explode(',', $lastValue); + $directives = array_map('trim', $directives); + if(in_array('no-cache', $directives) || in_array('no-store', $directives)) { + break; + } + } + $cache->saveData($result['body']); + break; + case 304: // Not Modified + $response['content'] = $cache->loadData(); + break; + default: + throw new HttpException('', $result['code']); + } + if ($returnHeader === true) { + return $response; + } + return $response['content']; +} + +/** + * Private function used internally + * + * Fetch content from url + * + * @throws HttpException + */ +function _http_request(string $url, array $config = []): array +{ + $defaults = [ + 'useragent' => Configuration::getConfig('http', 'useragent'), + 'timeout' => Configuration::getConfig('http', 'timeout'), + 'headers' => [], + 'proxy' => null, + 'curl_options' => [], + 'if_not_modified_since' => null, + 'retries' => 3, + ]; + $config = array_merge($defaults, $config); $ch = curl_init($url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); - - if(is_array($header) && count($header) !== 0) { - - Debug::log('Setting headers: ' . json_encode($header)); - curl_setopt($ch, CURLOPT_HTTPHEADER, $header); - - } - - curl_setopt($ch, CURLOPT_USERAGENT, Configuration::getConfig('http', 'useragent')); - curl_setopt($ch, CURLOPT_TIMEOUT, Configuration::getConfig('http', 'timeout')); + curl_setopt($ch, CURLOPT_MAXREDIRS, 5); + curl_setopt($ch, CURLOPT_HEADER, false); + curl_setopt($ch, CURLOPT_HTTPHEADER, $config['headers']); + curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']); + curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']); curl_setopt($ch, CURLOPT_ENCODING, ''); curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); - - if(is_array($opts) && count($opts) !== 0) { - - Debug::log('Setting options: ' . json_encode($opts)); - - foreach($opts as $key => $value) { - curl_setopt($ch, $key, $value); - } + if($config['proxy']) { + curl_setopt($ch, CURLOPT_PROXY, $config['proxy']); } - - if(defined('PROXY_URL') && !defined('NOPROXY')) { - - Debug::log('Setting proxy url: ' . PROXY_URL); - curl_setopt($ch, CURLOPT_PROXY, PROXY_URL); - + foreach($config['curl_options'] as $key => $value) { + curl_setopt($ch, $key, $value); } - - // We always want the response header as part of the data! - curl_setopt($ch, CURLOPT_HEADER, true); - - // Build "If-Modified-Since" header - if(!Debug::isEnabled() && $time = $cache->getTime()) { // Skip if cache file doesn't exist - Debug::log('Adding If-Modified-Since'); - curl_setopt($ch, CURLOPT_TIMEVALUE, $time); + if ($config['if_not_modified_since']) { + curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']); curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE); } - // Enables logging for the outgoing header - curl_setopt($ch, CURLINFO_HEADER_OUT, true); + $responseHeaders = []; + curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders) { + $len = strlen($rawHeader); + if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader) || $rawHeader === "\r\n") { + return $len; + } + $header = explode(':', $rawHeader); + if (count($header) === 1) { + return $len; + } + $name = mb_strtolower(trim($header[0])); + $value = trim(implode(':', array_slice($header, 1))); + if (!isset($responseHeaders[$name])) { + $responseHeaders[$name] = []; + } + $responseHeaders[$name][] = $value; + return $len; + }); - $data = curl_exec($ch); - $errorCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); - - $curlError = curl_error($ch); - $curlErrno = curl_errno($ch); - $curlInfo = curl_getinfo($ch); - - Debug::log('Outgoing header: ' . json_encode($curlInfo)); - - if($data === false) - Debug::log('Cant\'t download ' . $url . ' cUrl error: ' . $curlError . ' (' . $curlErrno . ')'); - - $headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE); - $header = substr($data, 0, $headerSize); - $retVal['header'] = $header; - - Debug::log('Response header: ' . $header); - - $headers = parseResponseHeader($header); - $finalHeader = end($headers); - - curl_close($ch); - - $finalHeader = array_change_key_case($finalHeader, CASE_LOWER); - - switch($errorCode) { - case 200: // Contents OK - case 201: // Contents Created - case 202: // Contents Accepted - Debug::log('New contents received'); - $data = substr($data, $headerSize); - // Disable caching if the server responds with "Cache-Control: no-cache" - // or "Cache-Control: no-store" - - if(array_key_exists('cache-control', $finalHeader)) { - Debug::log('Server responded with "Cache-Control" header'); - $directives = explode(',', $finalHeader['cache-control']); - $directives = array_map('trim', $directives); - if(in_array('no-cache', $directives) - || in_array('no-store', $directives)) { // Skip caching - Debug::log('Skip server side caching'); - $retVal['content'] = $data; - break; - } - } - Debug::log('Store response to cache'); - $cache->saveData($data); - $retVal['content'] = $data; + $attempts = 0; + while(true) { + $attempts++; + $data = curl_exec($ch); + if ($data !== false) { + // The network call was successful, so break out of the loop break; - case 304: // Not modified, use cached data - Debug::log('Contents not modified on host, returning cached data'); - $retVal['content'] = $cache->loadData(); - break; - default: - if(array_key_exists('server', $finalHeader) && stripos($finalHeader['server'], 'cloudflare') !== false) { - throw new CloudflareChallengeException($errorCode); - } - - if ($curlError || $curlErrno) { - throw new GetContentsException('cURL error: ' . $curlError . ' (' . $curlErrno . ')'); - } - - throw new UnexpectedResponseException($retVal['content'], $retVal['header'], $errorCode); + } + if ($attempts > $config['retries']) { + // Finally give up + throw new HttpException(sprintf('%s (%s)', curl_error($ch), curl_errno($ch))); + } } - return ($returnHeader === true) ? $retVal : $retVal['content']; + $statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + curl_close($ch); + return [ + 'code' => $statusCode, + 'headers' => $responseHeaders, + 'body' => $data, + ]; } /** @@ -277,7 +178,11 @@ function getSimpleHTMLDOM($url, $defaultBRText = DEFAULT_BR_TEXT, $defaultSpanText = DEFAULT_SPAN_TEXT){ - $content = getContents($url, $header, $opts); + $content = getContents( + $url, + $header ?? [], + $opts ?? [] + ); return str_get_html($content, $lowercase, $forceTagsClosed, @@ -362,44 +267,6 @@ function getSimpleHTMLDOMCached($url, $defaultSpanText); } -/** - * Parses the cURL response header into an associative array - * - * Based on https://stackoverflow.com/a/18682872 - * - * @param string $header The cURL response header. - * @return array An associative array of response headers. - */ -function parseResponseHeader($header) { - - $headers = array(); - $requests = explode("\r\n\r\n", trim($header)); - - foreach ($requests as $request) { - - $header = array(); - - foreach (explode("\r\n", $request) as $i => $line) { - - if($i === 0) { - $header['http_code'] = $line; - } else { - - list ($key, $value) = explode(':', $line); - $header[$key] = trim($value); - - } - - } - - $headers[] = $header; - - } - - return $headers; - -} - /** * Determines the MIME type from a URL/Path file extension. *