feat: add etag support to getContents (#3893)

This commit is contained in:
Dag 2024-01-12 01:31:01 +01:00 committed by GitHub
parent d5175aebcc
commit 191e5b0493
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 42 additions and 31 deletions

View file

@ -163,7 +163,7 @@ PHP ini config:
```ini
; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini
max_execution_time = 20
max_execution_time = 15
memory_limit = 64M
```

View file

@ -48,7 +48,7 @@ enable_maintenance_mode = false
[http]
; Operation timeout in seconds
timeout = 30
timeout = 15
; Operation retry count in case of curl error
retries = 2

View file

@ -16,7 +16,7 @@ final class BridgeCard
$bridge = $bridgeFactory->create($bridgeClassName);
$isHttps = strpos($bridge->getURI(), 'https') === 0;
$isHttps = str_starts_with($bridge->getURI(), 'https');
$uri = $bridge->getURI();
$name = $bridge->getName();
@ -113,8 +113,7 @@ EOD;
}
if (!$isHttps) {
$form .= '<div class="secure-warning">Warning :
This bridge is not fetching its content through a secure connection</div>';
$form .= '<div class="secure-warning">Warning: This bridge is not fetching its content through a secure connection</div>';
}
return $form;

View file

@ -41,7 +41,7 @@ abstract class FeedExpander extends BridgeAbstract
}
/**
* This method is overidden by bridges
* This method is overridden by bridges
*
* @return array
*/

View file

@ -7,9 +7,9 @@ declare(strict_types=1);
*
* Scrapes out rss 0.91, 1.0, 2.0 and atom 1.0.
*
* Produce arrays meant to be used inside rss-bridge.
* Produces array meant to be used inside rss-bridge.
*
* The item structure is tweaked so that works with FeedItem
* The item structure is tweaked so that it works with FeedItem
*/
final class FeedParser
{

View file

@ -518,7 +518,10 @@ abstract class XPathAbstract extends BridgeAbstract
if (strlen($value) === 0) {
return '';
}
if (strpos($value, 'http://') === 0 || strpos($value, 'https://') === 0) {
if (
strpos($value, 'http://') === 0
|| strpos($value, 'https://') === 0
) {
return $value;
}

View file

@ -24,6 +24,32 @@ function getContents(
$headerValue = trim(implode(':', array_slice($parts, 1)));
$httpHeadersNormalized[$headerName] = $headerValue;
}
$requestBodyHash = null;
if (isset($curlOptions[CURLOPT_POSTFIELDS])) {
$requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false));
}
$cacheKey = implode('_', ['server', $url, $requestBodyHash]);
/** @var Response $cachedResponse */
$cachedResponse = $cache->get($cacheKey);
if ($cachedResponse) {
$lastModified = $cachedResponse->getHeader('last-modified');
if ($lastModified) {
try {
// Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime
$lastModified = new \DateTimeImmutable((is_numeric($lastModified) ? '@' : '') . $lastModified);
$config['if_not_modified_since'] = $lastModified->getTimestamp();
} catch (Exception $e) {
// Failed to parse last-modified
}
}
$etag = $cachedResponse->getHeader('etag');
if ($etag) {
$httpHeadersNormalized['if-none-match'] = $etag;
}
}
// Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102
$defaultHttpHeaders = [
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
@ -35,6 +61,7 @@ function getContents(
'Sec-Fetch-User' => '?1',
'TE' => 'trailers',
];
$config = [
'useragent' => Configuration::getConfig('http', 'useragent'),
'timeout' => Configuration::getConfig('http', 'timeout'),
@ -53,28 +80,6 @@ function getContents(
$config['proxy'] = Configuration::getConfig('proxy', 'url');
}
$requestBodyHash = null;
if (isset($curlOptions[CURLOPT_POSTFIELDS])) {
$requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false));
}
$cacheKey = implode('_', ['server', $url, $requestBodyHash]);
/** @var Response $cachedResponse */
$cachedResponse = $cache->get($cacheKey);
if ($cachedResponse) {
$cachedLastModified = $cachedResponse->getHeader('last-modified');
if ($cachedLastModified) {
try {
// Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime
$cachedLastModified = new \DateTimeImmutable((is_numeric($cachedLastModified) ? '@' : '') . $cachedLastModified);
$config['if_not_modified_since'] = $cachedLastModified->getTimestamp();
} catch (Exception $dateTimeParseFailue) {
// Ignore invalid 'Last-Modified' HTTP header value
}
}
// todo: We should also check for Etag
}
$response = $httpClient->request($url, $config);
switch ($response->getCode()) {

View file

@ -258,6 +258,10 @@ final class Response
}
/**
* HTTP response may have multiple headers with the same name.
*
* This method by default, returns only the last header.
*
* @return string[]|string|null
*/
public function getHeader(string $name, bool $all = false)