Ensured URL validation is doe via HEAD method when the title does not need to be resolved

This commit is contained in:
Alejandro Celaya 2022-05-01 09:51:15 +02:00
parent bd495adf22
commit eea76999b2
2 changed files with 30 additions and 11 deletions

View file

@ -11,6 +11,7 @@ use GuzzleHttp\RequestOptions;
use Psr\Http\Message\ResponseInterface;
use Shlinkio\Shlink\Core\Exception\InvalidUrlException;
use Shlinkio\Shlink\Core\Options\UrlShortenerOptions;
use Throwable;
use function preg_match;
use function trim;
@ -36,7 +37,7 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface
return;
}
$this->validateUrlAndGetResponse($url, true);
$this->validateUrlAndGetResponse($url);
}
public function validateUrlWithTitle(string $url, bool $doValidate): ?string
@ -45,8 +46,13 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface
return null;
}
$response = $this->validateUrlAndGetResponse($url, $doValidate);
if ($response === null || ! $this->options->autoResolveTitles()) {
if (! $this->options->autoResolveTitles()) {
$this->validateUrlAndGetResponse($url, self::METHOD_HEAD);
return null;
}
$response = $doValidate ? $this->validateUrlAndGetResponse($url) : $this->getResponse($url);
if ($response === null) {
return null;
}
@ -55,20 +61,29 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface
return isset($matches[1]) ? trim($matches[1]) : null;
}
private function validateUrlAndGetResponse(string $url, bool $throwOnError): ?ResponseInterface
/**
* @param self::METHOD_GET|self::METHOD_HEAD $method
* @throws InvalidUrlException
*/
private function validateUrlAndGetResponse(string $url, string $method = self::METHOD_GET): ResponseInterface
{
try {
return $this->httpClient->request(self::METHOD_GET, $url, [
return $this->httpClient->request($method, $url, [
RequestOptions::ALLOW_REDIRECTS => ['max' => self::MAX_REDIRECTS],
RequestOptions::IDN_CONVERSION => true,
// Making the request with a browser's user agent makes the validation closer to a real user
RequestOptions::HEADERS => ['User-Agent' => self::CHROME_USER_AGENT],
]);
} catch (GuzzleException $e) {
if ($throwOnError) {
throw InvalidUrlException::fromUrl($url, $e);
}
throw InvalidUrlException::fromUrl($url, $e);
}
}
private function getResponse(string $url): ?ResponseInterface
{
try {
return $this->validateUrlAndGetResponse($url);
} catch (Throwable) {
return null;
}
}

View file

@ -107,7 +107,9 @@ class UrlValidatorTest extends TestCase
/** @test */
public function validateUrlWithTitleReturnsNullWhenAutoResolutionIsDisabledAndValidationIsEnabled(): void
{
$request = $this->httpClient->request(Argument::cetera())->willReturn($this->respWithTitle());
$request = $this->httpClient->request(RequestMethodInterface::METHOD_HEAD, Argument::cetera())->willReturn(
$this->respWithTitle(),
);
$this->options->autoResolveTitles = false;
$result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true);
@ -119,7 +121,9 @@ class UrlValidatorTest extends TestCase
/** @test */
public function validateUrlWithTitleResolvesTitleWhenAutoResolutionIsEnabled(): void
{
$request = $this->httpClient->request(Argument::cetera())->willReturn($this->respWithTitle());
$request = $this->httpClient->request(RequestMethodInterface::METHOD_GET, Argument::cetera())->willReturn(
$this->respWithTitle(),
);
$this->options->autoResolveTitles = true;
$result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true);
@ -131,7 +135,7 @@ class UrlValidatorTest extends TestCase
private function respWithTitle(): Response
{
$body = new Stream('php://temp', 'wr');
$body->write('<title> Resolved title</title>');
$body->write('<title data-foo="bar"> Resolved title</title>');
return new Response($body);
}