Ensured URL validation is doe via HEAD method when the title does not need to be resolved

This commit is contained in:
Alejandro Celaya 2022-05-01 09:51:15 +02:00
parent bd495adf22
commit eea76999b2
2 changed files with 30 additions and 11 deletions

View file

@ -11,6 +11,7 @@ use GuzzleHttp\RequestOptions;
use Psr\Http\Message\ResponseInterface; use Psr\Http\Message\ResponseInterface;
use Shlinkio\Shlink\Core\Exception\InvalidUrlException; use Shlinkio\Shlink\Core\Exception\InvalidUrlException;
use Shlinkio\Shlink\Core\Options\UrlShortenerOptions; use Shlinkio\Shlink\Core\Options\UrlShortenerOptions;
use Throwable;
use function preg_match; use function preg_match;
use function trim; use function trim;
@ -36,7 +37,7 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface
return; return;
} }
$this->validateUrlAndGetResponse($url, true); $this->validateUrlAndGetResponse($url);
} }
public function validateUrlWithTitle(string $url, bool $doValidate): ?string public function validateUrlWithTitle(string $url, bool $doValidate): ?string
@ -45,8 +46,13 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface
return null; return null;
} }
$response = $this->validateUrlAndGetResponse($url, $doValidate); if (! $this->options->autoResolveTitles()) {
if ($response === null || ! $this->options->autoResolveTitles()) { $this->validateUrlAndGetResponse($url, self::METHOD_HEAD);
return null;
}
$response = $doValidate ? $this->validateUrlAndGetResponse($url) : $this->getResponse($url);
if ($response === null) {
return null; return null;
} }
@ -55,20 +61,29 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface
return isset($matches[1]) ? trim($matches[1]) : null; return isset($matches[1]) ? trim($matches[1]) : null;
} }
private function validateUrlAndGetResponse(string $url, bool $throwOnError): ?ResponseInterface /**
* @param self::METHOD_GET|self::METHOD_HEAD $method
* @throws InvalidUrlException
*/
private function validateUrlAndGetResponse(string $url, string $method = self::METHOD_GET): ResponseInterface
{ {
try { try {
return $this->httpClient->request(self::METHOD_GET, $url, [ return $this->httpClient->request($method, $url, [
RequestOptions::ALLOW_REDIRECTS => ['max' => self::MAX_REDIRECTS], RequestOptions::ALLOW_REDIRECTS => ['max' => self::MAX_REDIRECTS],
RequestOptions::IDN_CONVERSION => true, RequestOptions::IDN_CONVERSION => true,
// Making the request with a browser's user agent makes the validation closer to a real user // Making the request with a browser's user agent makes the validation closer to a real user
RequestOptions::HEADERS => ['User-Agent' => self::CHROME_USER_AGENT], RequestOptions::HEADERS => ['User-Agent' => self::CHROME_USER_AGENT],
]); ]);
} catch (GuzzleException $e) { } catch (GuzzleException $e) {
if ($throwOnError) { throw InvalidUrlException::fromUrl($url, $e);
throw InvalidUrlException::fromUrl($url, $e); }
} }
private function getResponse(string $url): ?ResponseInterface
{
try {
return $this->validateUrlAndGetResponse($url);
} catch (Throwable) {
return null; return null;
} }
} }

View file

@ -107,7 +107,9 @@ class UrlValidatorTest extends TestCase
/** @test */ /** @test */
public function validateUrlWithTitleReturnsNullWhenAutoResolutionIsDisabledAndValidationIsEnabled(): void public function validateUrlWithTitleReturnsNullWhenAutoResolutionIsDisabledAndValidationIsEnabled(): void
{ {
$request = $this->httpClient->request(Argument::cetera())->willReturn($this->respWithTitle()); $request = $this->httpClient->request(RequestMethodInterface::METHOD_HEAD, Argument::cetera())->willReturn(
$this->respWithTitle(),
);
$this->options->autoResolveTitles = false; $this->options->autoResolveTitles = false;
$result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true); $result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true);
@ -119,7 +121,9 @@ class UrlValidatorTest extends TestCase
/** @test */ /** @test */
public function validateUrlWithTitleResolvesTitleWhenAutoResolutionIsEnabled(): void public function validateUrlWithTitleResolvesTitleWhenAutoResolutionIsEnabled(): void
{ {
$request = $this->httpClient->request(Argument::cetera())->willReturn($this->respWithTitle()); $request = $this->httpClient->request(RequestMethodInterface::METHOD_GET, Argument::cetera())->willReturn(
$this->respWithTitle(),
);
$this->options->autoResolveTitles = true; $this->options->autoResolveTitles = true;
$result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true); $result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true);
@ -131,7 +135,7 @@ class UrlValidatorTest extends TestCase
private function respWithTitle(): Response private function respWithTitle(): Response
{ {
$body = new Stream('php://temp', 'wr'); $body = new Stream('php://temp', 'wr');
$body->write('<title> Resolved title</title>'); $body->write('<title data-foo="bar"> Resolved title</title>');
return new Response($body); return new Response($body);
} }