diff --git a/README.md b/README.md index e027d912..d6d1046c 100644 --- a/README.md +++ b/README.md @@ -163,7 +163,7 @@ PHP ini config: ```ini ; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini -max_execution_time = 20 +max_execution_time = 15 memory_limit = 64M ``` diff --git a/config.default.ini.php b/config.default.ini.php index 21727c5e..ee1e54c9 100644 --- a/config.default.ini.php +++ b/config.default.ini.php @@ -48,7 +48,7 @@ enable_maintenance_mode = false [http] ; Operation timeout in seconds -timeout = 30 +timeout = 15 ; Operation retry count in case of curl error retries = 2 diff --git a/lib/BridgeCard.php b/lib/BridgeCard.php index 4781ebc1..a82f8e5a 100644 --- a/lib/BridgeCard.php +++ b/lib/BridgeCard.php @@ -16,7 +16,7 @@ final class BridgeCard $bridge = $bridgeFactory->create($bridgeClassName); - $isHttps = strpos($bridge->getURI(), 'https') === 0; + $isHttps = str_starts_with($bridge->getURI(), 'https'); $uri = $bridge->getURI(); $name = $bridge->getName(); @@ -113,8 +113,7 @@ EOD; } if (!$isHttps) { - $form .= '
Warning : -This bridge is not fetching its content through a secure connection
'; + $form .= '
Warning: This bridge is not fetching its content through a secure connection
'; } return $form; diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index 056578e9..c0d7e878 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -41,7 +41,7 @@ abstract class FeedExpander extends BridgeAbstract } /** - * This method is overidden by bridges + * This method is overridden by bridges * * @return array */ diff --git a/lib/FeedParser.php b/lib/FeedParser.php index 2d982de1..510bcb32 100644 --- a/lib/FeedParser.php +++ b/lib/FeedParser.php @@ -7,9 +7,9 @@ declare(strict_types=1); * * Scrapes out rss 0.91, 1.0, 2.0 and atom 1.0. * - * Produce arrays meant to be used inside rss-bridge. + * Produces array meant to be used inside rss-bridge. * - * The item structure is tweaked so that works with FeedItem + * The item structure is tweaked so that it works with FeedItem */ final class FeedParser { diff --git a/lib/XPathAbstract.php b/lib/XPathAbstract.php index e30bb5eb..2206f79a 100644 --- a/lib/XPathAbstract.php +++ b/lib/XPathAbstract.php @@ -518,7 +518,10 @@ abstract class XPathAbstract extends BridgeAbstract if (strlen($value) === 0) { return ''; } - if (strpos($value, 'http://') === 0 || strpos($value, 'https://') === 0) { + if ( + strpos($value, 'http://') === 0 + || strpos($value, 'https://') === 0 + ) { return $value; } diff --git a/lib/contents.php b/lib/contents.php index 9998a3f1..43db8c03 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -24,6 +24,32 @@ function getContents( $headerValue = trim(implode(':', array_slice($parts, 1))); $httpHeadersNormalized[$headerName] = $headerValue; } + + $requestBodyHash = null; + if (isset($curlOptions[CURLOPT_POSTFIELDS])) { + $requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false)); + } + $cacheKey = implode('_', ['server', $url, $requestBodyHash]); + + /** @var Response $cachedResponse */ + $cachedResponse = $cache->get($cacheKey); + if ($cachedResponse) { + $lastModified = $cachedResponse->getHeader('last-modified'); + if ($lastModified) { + try { + // Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime + $lastModified = new \DateTimeImmutable((is_numeric($lastModified) ? '@' : '') . $lastModified); + $config['if_not_modified_since'] = $lastModified->getTimestamp(); + } catch (Exception $e) { + // Failed to parse last-modified + } + } + $etag = $cachedResponse->getHeader('etag'); + if ($etag) { + $httpHeadersNormalized['if-none-match'] = $etag; + } + } + // Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102 $defaultHttpHeaders = [ 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', @@ -35,6 +61,7 @@ function getContents( 'Sec-Fetch-User' => '?1', 'TE' => 'trailers', ]; + $config = [ 'useragent' => Configuration::getConfig('http', 'useragent'), 'timeout' => Configuration::getConfig('http', 'timeout'), @@ -53,28 +80,6 @@ function getContents( $config['proxy'] = Configuration::getConfig('proxy', 'url'); } - $requestBodyHash = null; - if (isset($curlOptions[CURLOPT_POSTFIELDS])) { - $requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false)); - } - $cacheKey = implode('_', ['server', $url, $requestBodyHash]); - - /** @var Response $cachedResponse */ - $cachedResponse = $cache->get($cacheKey); - if ($cachedResponse) { - $cachedLastModified = $cachedResponse->getHeader('last-modified'); - if ($cachedLastModified) { - try { - // Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime - $cachedLastModified = new \DateTimeImmutable((is_numeric($cachedLastModified) ? '@' : '') . $cachedLastModified); - $config['if_not_modified_since'] = $cachedLastModified->getTimestamp(); - } catch (Exception $dateTimeParseFailue) { - // Ignore invalid 'Last-Modified' HTTP header value - } - } - // todo: We should also check for Etag - } - $response = $httpClient->request($url, $config); switch ($response->getCode()) { diff --git a/lib/http.php b/lib/http.php index 405b01c6..90b65a6e 100644 --- a/lib/http.php +++ b/lib/http.php @@ -258,6 +258,10 @@ final class Response } /** + * HTTP response may have multiple headers with the same name. + * + * This method by default, returns only the last header. + * * @return string[]|string|null */ public function getHeader(string $name, bool $all = false)