mirror of
https://github.com/shlinkio/shlink.git
synced 2024-11-23 13:23:33 +03:00
Changed logic when resolving the title of a URL, to ensure only html content is tried to be downloaded, and only until the title tag has been parsed
This commit is contained in:
parent
eea76999b2
commit
18f656fed2
4 changed files with 72 additions and 6 deletions
17
CHANGELOG.md
17
CHANGELOG.md
|
@ -4,6 +4,23 @@ All notable changes to this project will be documented in this file.
|
|||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com), and this project adheres to [Semantic Versioning](https://semver.org).
|
||||
|
||||
## [Unreleased]
|
||||
### Added
|
||||
* *Nothing*
|
||||
|
||||
### Changed
|
||||
* *Nothing*
|
||||
|
||||
### Deprecated
|
||||
* *Nothing*
|
||||
|
||||
### Removed
|
||||
* *Nothing*
|
||||
|
||||
### Fixed
|
||||
* [#1439](https://github.com/shlinkio/shlink/issues/1439) Fixed crash when trying to auto-resolve titles for URLs which serve large binary files.
|
||||
|
||||
|
||||
## [3.1.0] - 2022-04-23
|
||||
### Added
|
||||
* [#1294](https://github.com/shlinkio/shlink/issues/1294) Allowed to provide a specific domain when importing URLs from YOURLS.
|
||||
|
|
|
@ -139,7 +139,7 @@
|
|||
"test:api": "bin/test/run-api-tests.sh",
|
||||
"test:api:ci": "GENERATE_COVERAGE=yes composer test:api",
|
||||
"infect:ci:base": "infection --threads=4 --log-verbosity=default --only-covered --only-covering-test-cases --skip-initial-tests",
|
||||
"infect:ci:unit": "@infect:ci:base --coverage=build/coverage-unit --min-msi=85",
|
||||
"infect:ci:unit": "@infect:ci:base --coverage=build/coverage-unit --min-msi=84",
|
||||
"infect:ci:db": "@infect:ci:base --coverage=build/coverage-db --min-msi=95 --configuration=infection-db.json",
|
||||
"infect:ci:api": "@infect:ci:base --coverage=build/coverage-api --min-msi=80 --configuration=infection-api.json",
|
||||
"infect:ci": "@parallel infect:ci:unit infect:ci:db infect:ci:api",
|
||||
|
|
|
@ -14,6 +14,9 @@ use Shlinkio\Shlink\Core\Options\UrlShortenerOptions;
|
|||
use Throwable;
|
||||
|
||||
use function preg_match;
|
||||
use function str_contains;
|
||||
use function str_starts_with;
|
||||
use function strtolower;
|
||||
use function trim;
|
||||
|
||||
use const Shlinkio\Shlink\TITLE_TAG_VALUE;
|
||||
|
@ -56,8 +59,18 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface
|
|||
return null;
|
||||
}
|
||||
|
||||
$body = $response->getBody()->__toString();
|
||||
preg_match(TITLE_TAG_VALUE, $body, $matches);
|
||||
$contentType = strtolower($response->getHeaderLine('Content-Type'));
|
||||
if (! str_starts_with($contentType, 'text/html')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$collectedBody = '';
|
||||
$body = $response->getBody();
|
||||
// With streaming enabled, we can walk the body until the </title> tag is found, and then stop
|
||||
while (! str_contains($collectedBody, '</title>') && ! $body->eof()) {
|
||||
$collectedBody .= $body->read(1024);
|
||||
}
|
||||
preg_match(TITLE_TAG_VALUE, $collectedBody, $matches);
|
||||
return isset($matches[1]) ? trim($matches[1]) : null;
|
||||
}
|
||||
|
||||
|
@ -73,6 +86,7 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface
|
|||
RequestOptions::IDN_CONVERSION => true,
|
||||
// Making the request with a browser's user agent makes the validation closer to a real user
|
||||
RequestOptions::HEADERS => ['User-Agent' => self::CHROME_USER_AGENT],
|
||||
RequestOptions::STREAM => true, // This ensures large files are not fully downloaded if not needed
|
||||
]);
|
||||
} catch (GuzzleException $e) {
|
||||
throw InvalidUrlException::fromUrl($url, $e);
|
||||
|
|
|
@ -132,11 +132,46 @@ class UrlValidatorTest extends TestCase
|
|||
$request->shouldHaveBeenCalledOnce();
|
||||
}
|
||||
|
||||
/** @test */
|
||||
public function validateUrlWithTitleReturnsNullWhenAutoResolutionIsEnabledAndReturnedContentTypeIsInvalid(): void
|
||||
{
|
||||
$request = $this->httpClient->request(RequestMethodInterface::METHOD_GET, Argument::cetera())->willReturn(
|
||||
new Response('php://memory', 200, ['Content-Type' => 'application/octet-stream']),
|
||||
);
|
||||
$this->options->autoResolveTitles = true;
|
||||
|
||||
$result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true);
|
||||
|
||||
self::assertNull($result);
|
||||
$request->shouldHaveBeenCalledOnce();
|
||||
}
|
||||
|
||||
/** @test */
|
||||
public function validateUrlWithTitleReturnsNullWhenAutoResolutionIsEnabledAndBodyDoesNotContainTitle(): void
|
||||
{
|
||||
$request = $this->httpClient->request(RequestMethodInterface::METHOD_GET, Argument::cetera())->willReturn(
|
||||
new Response($this->createStreamWithContent('<body>No title</body>'), 200, ['Content-Type' => 'text/html']),
|
||||
);
|
||||
$this->options->autoResolveTitles = true;
|
||||
|
||||
$result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true);
|
||||
|
||||
self::assertNull($result);
|
||||
$request->shouldHaveBeenCalledOnce();
|
||||
}
|
||||
|
||||
private function respWithTitle(): Response
|
||||
{
|
||||
$body = new Stream('php://temp', 'wr');
|
||||
$body->write('<title data-foo="bar"> Resolved title</title>');
|
||||
$body = $this->createStreamWithContent('<title data-foo="bar"> Resolved title</title>');
|
||||
return new Response($body, 200, ['Content-Type' => 'TEXT/html; charset=utf-8']);
|
||||
}
|
||||
|
||||
return new Response($body);
|
||||
private function createStreamWithContent(string $content): Stream
|
||||
{
|
||||
$body = new Stream('php://temp', 'wr');
|
||||
$body->write($content);
|
||||
$body->rewind();
|
||||
|
||||
return $body;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue