diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 385066e6..1088e0f9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: with: php-version: ${{ matrix.php-version }} tools: composer - extensions: openswoole-4.11.0 + extensions: openswoole-4.11.1 coverage: none - run: composer install --no-interaction --prefer-dist - run: composer ${{ matrix.command }} @@ -45,7 +45,7 @@ jobs: with: php-version: ${{ matrix.php-version }} tools: composer - extensions: openswoole-4.11.0 + extensions: openswoole-4.11.1 coverage: pcov ini-values: pcov.directory=module - run: composer install --no-interaction --prefer-dist @@ -80,7 +80,7 @@ jobs: with: php-version: ${{ matrix.php-version }} tools: composer - extensions: openswoole-4.11.0, pdo_sqlsrv-5.10.0 + extensions: openswoole-4.11.1, pdo_sqlsrv-5.10.0 coverage: pcov ini-values: pcov.directory=module - run: composer install --no-interaction --prefer-dist @@ -115,7 +115,7 @@ jobs: with: php-version: ${{ matrix.php-version }} tools: composer - extensions: openswoole-4.11.0 + extensions: openswoole-4.11.1 coverage: pcov ini-values: pcov.directory=module - run: composer install --no-interaction --prefer-dist diff --git a/.github/workflows/publish-release.yml b/.github/workflows/publish-release.yml index f872ebee..c4c10e3f 100644 --- a/.github/workflows/publish-release.yml +++ b/.github/workflows/publish-release.yml @@ -20,7 +20,7 @@ jobs: with: php-version: ${{ matrix.php-version }} tools: composer - extensions: openswoole-4.11.0 + extensions: openswoole-4.11.1 - if: ${{ matrix.swoole == 'yes' }} run: ./build.sh ${GITHUB_REF#refs/tags/v} - if: ${{ matrix.swoole == 'no' }} diff --git a/.github/workflows/publish-swagger-spec.yml b/.github/workflows/publish-swagger-spec.yml index 51907814..c3bbcc96 100644 --- a/.github/workflows/publish-swagger-spec.yml +++ b/.github/workflows/publish-swagger-spec.yml @@ -23,7 +23,7 @@ jobs: with: php-version: ${{ matrix.php-version }} tools: composer - extensions: openswoole-4.11.0 + extensions: openswoole-4.11.1 coverage: none - run: composer install --no-interaction --prefer-dist - run: composer swagger:inline diff --git a/CHANGELOG.md b/CHANGELOG.md index c29a817e..901d5e60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,23 @@ The format is based on [Keep a Changelog](https://keepachangelog.com), and this * *Nothing* +## [3.1.1] - 2022-05-09 +### Added +* *Nothing* + +### Changed +* [#1444](https://github.com/shlinkio/shlink/issues/1444) Updated docker image to openswoole 4.11.1, in an attempt to fix error. + +### Deprecated +* *Nothing* + +### Removed +* *Nothing* + +### Fixed +* [#1439](https://github.com/shlinkio/shlink/issues/1439) Fixed crash when trying to auto-resolve titles for URLs which serve large binary files. + + ## [3.1.0] - 2022-04-23 ### Added * [#1294](https://github.com/shlinkio/shlink/issues/1294) Allowed to provide a specific domain when importing URLs from YOURLS. diff --git a/Dockerfile b/Dockerfile index 0b3ee781..940364a3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM php:8.1.5-alpine3.15 as base ARG SHLINK_VERSION=latest ENV SHLINK_VERSION ${SHLINK_VERSION} -ENV OPENSWOOLE_VERSION 4.11.0 +ENV OPENSWOOLE_VERSION 4.11.1 ENV PDO_SQLSRV_VERSION 5.10.0 ENV MS_ODBC_SQL_VERSION 17.5.2.2 ENV LC_ALL "C" diff --git a/composer.json b/composer.json index 96a0c639..57ff443a 100644 --- a/composer.json +++ b/composer.json @@ -65,7 +65,7 @@ "devster/ubench": "^2.1", "dms/phpunit-arraysubset-asserts": "^0.3.0", "infection/infection": "^0.26.5", - "openswoole/ide-helper": "~4.11.0", + "openswoole/ide-helper": "~4.11.1", "phpspec/prophecy-phpunit": "^2.0", "phpstan/phpstan": "^1.2", "phpstan/phpstan-doctrine": "^1.0", @@ -139,7 +139,7 @@ "test:api": "bin/test/run-api-tests.sh", "test:api:ci": "GENERATE_COVERAGE=yes composer test:api", "infect:ci:base": "infection --threads=4 --log-verbosity=default --only-covered --only-covering-test-cases --skip-initial-tests", - "infect:ci:unit": "@infect:ci:base --coverage=build/coverage-unit --min-msi=85", + "infect:ci:unit": "@infect:ci:base --coverage=build/coverage-unit --min-msi=84", "infect:ci:db": "@infect:ci:base --coverage=build/coverage-db --min-msi=95 --configuration=infection-db.json", "infect:ci:api": "@infect:ci:base --coverage=build/coverage-api --min-msi=80 --configuration=infection-api.json", "infect:ci": "@parallel infect:ci:unit infect:ci:db infect:ci:api", diff --git a/data/infra/swoole.Dockerfile b/data/infra/swoole.Dockerfile index b7f26c7c..144eeb08 100644 --- a/data/infra/swoole.Dockerfile +++ b/data/infra/swoole.Dockerfile @@ -3,7 +3,7 @@ MAINTAINER Alejandro Celaya <alejandro@alejandrocelaya.com> ENV APCU_VERSION 5.1.21 ENV INOTIFY_VERSION 3.0.0 -ENV OPENSWOOLE_VERSION 4.11.0 +ENV OPENSWOOLE_VERSION 4.11.1 ENV PDO_SQLSRV_VERSION 5.10.0 ENV MS_ODBC_SQL_VERSION 17.5.2.2 diff --git a/module/Core/src/Util/UrlValidator.php b/module/Core/src/Util/UrlValidator.php index da061c0d..6bd8d76f 100644 --- a/module/Core/src/Util/UrlValidator.php +++ b/module/Core/src/Util/UrlValidator.php @@ -11,8 +11,12 @@ use GuzzleHttp\RequestOptions; use Psr\Http\Message\ResponseInterface; use Shlinkio\Shlink\Core\Exception\InvalidUrlException; use Shlinkio\Shlink\Core\Options\UrlShortenerOptions; +use Throwable; use function preg_match; +use function str_contains; +use function str_starts_with; +use function strtolower; use function trim; use const Shlinkio\Shlink\TITLE_TAG_VALUE; @@ -36,7 +40,7 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface return; } - $this->validateUrlAndGetResponse($url, true); + $this->validateUrlAndGetResponse($url); } public function validateUrlWithTitle(string $url, bool $doValidate): ?string @@ -45,30 +49,55 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface return null; } - $response = $this->validateUrlAndGetResponse($url, $doValidate); - if ($response === null || ! $this->options->autoResolveTitles()) { + if (! $this->options->autoResolveTitles()) { + $this->validateUrlAndGetResponse($url, self::METHOD_HEAD); return null; } - $body = $response->getBody()->__toString(); - preg_match(TITLE_TAG_VALUE, $body, $matches); + $response = $doValidate ? $this->validateUrlAndGetResponse($url) : $this->getResponse($url); + if ($response === null) { + return null; + } + + $contentType = strtolower($response->getHeaderLine('Content-Type')); + if (! str_starts_with($contentType, 'text/html')) { + return null; + } + + $collectedBody = ''; + $body = $response->getBody(); + // With streaming enabled, we can walk the body until the </title> tag is found, and then stop + while (! str_contains($collectedBody, '</title>') && ! $body->eof()) { + $collectedBody .= $body->read(1024); + } + preg_match(TITLE_TAG_VALUE, $collectedBody, $matches); return isset($matches[1]) ? trim($matches[1]) : null; } - private function validateUrlAndGetResponse(string $url, bool $throwOnError): ?ResponseInterface + /** + * @param self::METHOD_GET|self::METHOD_HEAD $method + * @throws InvalidUrlException + */ + private function validateUrlAndGetResponse(string $url, string $method = self::METHOD_GET): ResponseInterface { try { - return $this->httpClient->request(self::METHOD_GET, $url, [ + return $this->httpClient->request($method, $url, [ RequestOptions::ALLOW_REDIRECTS => ['max' => self::MAX_REDIRECTS], RequestOptions::IDN_CONVERSION => true, // Making the request with a browser's user agent makes the validation closer to a real user RequestOptions::HEADERS => ['User-Agent' => self::CHROME_USER_AGENT], + RequestOptions::STREAM => true, // This ensures large files are not fully downloaded if not needed ]); } catch (GuzzleException $e) { - if ($throwOnError) { - throw InvalidUrlException::fromUrl($url, $e); - } + throw InvalidUrlException::fromUrl($url, $e); + } + } + private function getResponse(string $url): ?ResponseInterface + { + try { + return $this->validateUrlAndGetResponse($url); + } catch (Throwable) { return null; } } diff --git a/module/Core/test/Util/UrlValidatorTest.php b/module/Core/test/Util/UrlValidatorTest.php index 55cb4d86..7e1f9a1b 100644 --- a/module/Core/test/Util/UrlValidatorTest.php +++ b/module/Core/test/Util/UrlValidatorTest.php @@ -107,7 +107,9 @@ class UrlValidatorTest extends TestCase /** @test */ public function validateUrlWithTitleReturnsNullWhenAutoResolutionIsDisabledAndValidationIsEnabled(): void { - $request = $this->httpClient->request(Argument::cetera())->willReturn($this->respWithTitle()); + $request = $this->httpClient->request(RequestMethodInterface::METHOD_HEAD, Argument::cetera())->willReturn( + $this->respWithTitle(), + ); $this->options->autoResolveTitles = false; $result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true); @@ -119,7 +121,9 @@ class UrlValidatorTest extends TestCase /** @test */ public function validateUrlWithTitleResolvesTitleWhenAutoResolutionIsEnabled(): void { - $request = $this->httpClient->request(Argument::cetera())->willReturn($this->respWithTitle()); + $request = $this->httpClient->request(RequestMethodInterface::METHOD_GET, Argument::cetera())->willReturn( + $this->respWithTitle(), + ); $this->options->autoResolveTitles = true; $result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true); @@ -128,11 +132,46 @@ class UrlValidatorTest extends TestCase $request->shouldHaveBeenCalledOnce(); } + /** @test */ + public function validateUrlWithTitleReturnsNullWhenAutoResolutionIsEnabledAndReturnedContentTypeIsInvalid(): void + { + $request = $this->httpClient->request(RequestMethodInterface::METHOD_GET, Argument::cetera())->willReturn( + new Response('php://memory', 200, ['Content-Type' => 'application/octet-stream']), + ); + $this->options->autoResolveTitles = true; + + $result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true); + + self::assertNull($result); + $request->shouldHaveBeenCalledOnce(); + } + + /** @test */ + public function validateUrlWithTitleReturnsNullWhenAutoResolutionIsEnabledAndBodyDoesNotContainTitle(): void + { + $request = $this->httpClient->request(RequestMethodInterface::METHOD_GET, Argument::cetera())->willReturn( + new Response($this->createStreamWithContent('<body>No title</body>'), 200, ['Content-Type' => 'text/html']), + ); + $this->options->autoResolveTitles = true; + + $result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true); + + self::assertNull($result); + $request->shouldHaveBeenCalledOnce(); + } + private function respWithTitle(): Response { - $body = new Stream('php://temp', 'wr'); - $body->write('<title> Resolved title</title>'); + $body = $this->createStreamWithContent('<title data-foo="bar"> Resolved title</title>'); + return new Response($body, 200, ['Content-Type' => 'TEXT/html; charset=utf-8']); + } - return new Response($body); + private function createStreamWithContent(string $content): Stream + { + $body = new Stream('php://temp', 'wr'); + $body->write($content); + $body->rewind(); + + return $body; } }