From bd495adf22a05c37e70efc48e00d55d25afc215d Mon Sep 17 00:00:00 2001 From: Alejandro Celaya <alejandrocelaya@gmail.com> Date: Sun, 1 May 2022 08:40:20 +0200 Subject: [PATCH 1/4] Set SemVer versions for some shlink package versions --- composer.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/composer.json b/composer.json index 2446aace..1179e136 100644 --- a/composer.json +++ b/composer.json @@ -50,8 +50,8 @@ "shlinkio/shlink-common": "^4.4", "shlinkio/shlink-config": "^1.6", "shlinkio/shlink-event-dispatcher": "^2.3", - "shlinkio/shlink-importer": "dev-main#af0e05e as 3.0", - "shlinkio/shlink-installer": "dev-develop#fbbc8f5 as 7.1", + "shlinkio/shlink-importer": "^3.0", + "shlinkio/shlink-installer": "^7.1", "shlinkio/shlink-ip-geolocation": "^2.2", "symfony/console": "^6.0", "symfony/filesystem": "^6.0", From eea76999b2e89b21a9d69b2300cdd61af63295af Mon Sep 17 00:00:00 2001 From: Alejandro Celaya <alejandrocelaya@gmail.com> Date: Sun, 1 May 2022 09:51:15 +0200 Subject: [PATCH 2/4] Ensured URL validation is doe via HEAD method when the title does not need to be resolved --- module/Core/src/Util/UrlValidator.php | 31 ++++++++++++++++------ module/Core/test/Util/UrlValidatorTest.php | 10 ++++--- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/module/Core/src/Util/UrlValidator.php b/module/Core/src/Util/UrlValidator.php index da061c0d..006ca372 100644 --- a/module/Core/src/Util/UrlValidator.php +++ b/module/Core/src/Util/UrlValidator.php @@ -11,6 +11,7 @@ use GuzzleHttp\RequestOptions; use Psr\Http\Message\ResponseInterface; use Shlinkio\Shlink\Core\Exception\InvalidUrlException; use Shlinkio\Shlink\Core\Options\UrlShortenerOptions; +use Throwable; use function preg_match; use function trim; @@ -36,7 +37,7 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface return; } - $this->validateUrlAndGetResponse($url, true); + $this->validateUrlAndGetResponse($url); } public function validateUrlWithTitle(string $url, bool $doValidate): ?string @@ -45,8 +46,13 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface return null; } - $response = $this->validateUrlAndGetResponse($url, $doValidate); - if ($response === null || ! $this->options->autoResolveTitles()) { + if (! $this->options->autoResolveTitles()) { + $this->validateUrlAndGetResponse($url, self::METHOD_HEAD); + return null; + } + + $response = $doValidate ? $this->validateUrlAndGetResponse($url) : $this->getResponse($url); + if ($response === null) { return null; } @@ -55,20 +61,29 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface return isset($matches[1]) ? trim($matches[1]) : null; } - private function validateUrlAndGetResponse(string $url, bool $throwOnError): ?ResponseInterface + /** + * @param self::METHOD_GET|self::METHOD_HEAD $method + * @throws InvalidUrlException + */ + private function validateUrlAndGetResponse(string $url, string $method = self::METHOD_GET): ResponseInterface { try { - return $this->httpClient->request(self::METHOD_GET, $url, [ + return $this->httpClient->request($method, $url, [ RequestOptions::ALLOW_REDIRECTS => ['max' => self::MAX_REDIRECTS], RequestOptions::IDN_CONVERSION => true, // Making the request with a browser's user agent makes the validation closer to a real user RequestOptions::HEADERS => ['User-Agent' => self::CHROME_USER_AGENT], ]); } catch (GuzzleException $e) { - if ($throwOnError) { - throw InvalidUrlException::fromUrl($url, $e); - } + throw InvalidUrlException::fromUrl($url, $e); + } + } + private function getResponse(string $url): ?ResponseInterface + { + try { + return $this->validateUrlAndGetResponse($url); + } catch (Throwable) { return null; } } diff --git a/module/Core/test/Util/UrlValidatorTest.php b/module/Core/test/Util/UrlValidatorTest.php index 55cb4d86..7a943ea0 100644 --- a/module/Core/test/Util/UrlValidatorTest.php +++ b/module/Core/test/Util/UrlValidatorTest.php @@ -107,7 +107,9 @@ class UrlValidatorTest extends TestCase /** @test */ public function validateUrlWithTitleReturnsNullWhenAutoResolutionIsDisabledAndValidationIsEnabled(): void { - $request = $this->httpClient->request(Argument::cetera())->willReturn($this->respWithTitle()); + $request = $this->httpClient->request(RequestMethodInterface::METHOD_HEAD, Argument::cetera())->willReturn( + $this->respWithTitle(), + ); $this->options->autoResolveTitles = false; $result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true); @@ -119,7 +121,9 @@ class UrlValidatorTest extends TestCase /** @test */ public function validateUrlWithTitleResolvesTitleWhenAutoResolutionIsEnabled(): void { - $request = $this->httpClient->request(Argument::cetera())->willReturn($this->respWithTitle()); + $request = $this->httpClient->request(RequestMethodInterface::METHOD_GET, Argument::cetera())->willReturn( + $this->respWithTitle(), + ); $this->options->autoResolveTitles = true; $result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true); @@ -131,7 +135,7 @@ class UrlValidatorTest extends TestCase private function respWithTitle(): Response { $body = new Stream('php://temp', 'wr'); - $body->write('<title> Resolved title</title>'); + $body->write('<title data-foo="bar"> Resolved title</title>'); return new Response($body); } From 18f656fed236007e0a2d3db1fed76d4cc7275594 Mon Sep 17 00:00:00 2001 From: Alejandro Celaya <alejandrocelaya@gmail.com> Date: Sun, 1 May 2022 11:48:20 +0200 Subject: [PATCH 3/4] Changed logic when resolving the title of a URL, to ensure only html content is tried to be downloaded, and only until the title tag has been parsed --- CHANGELOG.md | 17 +++++++++ composer.json | 2 +- module/Core/src/Util/UrlValidator.php | 18 ++++++++-- module/Core/test/Util/UrlValidatorTest.php | 41 ++++++++++++++++++++-- 4 files changed, 72 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 882bd2ed..fdaa75e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,23 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com), and this project adheres to [Semantic Versioning](https://semver.org). +## [Unreleased] +### Added +* *Nothing* + +### Changed +* *Nothing* + +### Deprecated +* *Nothing* + +### Removed +* *Nothing* + +### Fixed +* [#1439](https://github.com/shlinkio/shlink/issues/1439) Fixed crash when trying to auto-resolve titles for URLs which serve large binary files. + + ## [3.1.0] - 2022-04-23 ### Added * [#1294](https://github.com/shlinkio/shlink/issues/1294) Allowed to provide a specific domain when importing URLs from YOURLS. diff --git a/composer.json b/composer.json index 1179e136..d310a103 100644 --- a/composer.json +++ b/composer.json @@ -139,7 +139,7 @@ "test:api": "bin/test/run-api-tests.sh", "test:api:ci": "GENERATE_COVERAGE=yes composer test:api", "infect:ci:base": "infection --threads=4 --log-verbosity=default --only-covered --only-covering-test-cases --skip-initial-tests", - "infect:ci:unit": "@infect:ci:base --coverage=build/coverage-unit --min-msi=85", + "infect:ci:unit": "@infect:ci:base --coverage=build/coverage-unit --min-msi=84", "infect:ci:db": "@infect:ci:base --coverage=build/coverage-db --min-msi=95 --configuration=infection-db.json", "infect:ci:api": "@infect:ci:base --coverage=build/coverage-api --min-msi=80 --configuration=infection-api.json", "infect:ci": "@parallel infect:ci:unit infect:ci:db infect:ci:api", diff --git a/module/Core/src/Util/UrlValidator.php b/module/Core/src/Util/UrlValidator.php index 006ca372..6bd8d76f 100644 --- a/module/Core/src/Util/UrlValidator.php +++ b/module/Core/src/Util/UrlValidator.php @@ -14,6 +14,9 @@ use Shlinkio\Shlink\Core\Options\UrlShortenerOptions; use Throwable; use function preg_match; +use function str_contains; +use function str_starts_with; +use function strtolower; use function trim; use const Shlinkio\Shlink\TITLE_TAG_VALUE; @@ -56,8 +59,18 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface return null; } - $body = $response->getBody()->__toString(); - preg_match(TITLE_TAG_VALUE, $body, $matches); + $contentType = strtolower($response->getHeaderLine('Content-Type')); + if (! str_starts_with($contentType, 'text/html')) { + return null; + } + + $collectedBody = ''; + $body = $response->getBody(); + // With streaming enabled, we can walk the body until the </title> tag is found, and then stop + while (! str_contains($collectedBody, '</title>') && ! $body->eof()) { + $collectedBody .= $body->read(1024); + } + preg_match(TITLE_TAG_VALUE, $collectedBody, $matches); return isset($matches[1]) ? trim($matches[1]) : null; } @@ -73,6 +86,7 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface RequestOptions::IDN_CONVERSION => true, // Making the request with a browser's user agent makes the validation closer to a real user RequestOptions::HEADERS => ['User-Agent' => self::CHROME_USER_AGENT], + RequestOptions::STREAM => true, // This ensures large files are not fully downloaded if not needed ]); } catch (GuzzleException $e) { throw InvalidUrlException::fromUrl($url, $e); diff --git a/module/Core/test/Util/UrlValidatorTest.php b/module/Core/test/Util/UrlValidatorTest.php index 7a943ea0..7e1f9a1b 100644 --- a/module/Core/test/Util/UrlValidatorTest.php +++ b/module/Core/test/Util/UrlValidatorTest.php @@ -132,11 +132,46 @@ class UrlValidatorTest extends TestCase $request->shouldHaveBeenCalledOnce(); } + /** @test */ + public function validateUrlWithTitleReturnsNullWhenAutoResolutionIsEnabledAndReturnedContentTypeIsInvalid(): void + { + $request = $this->httpClient->request(RequestMethodInterface::METHOD_GET, Argument::cetera())->willReturn( + new Response('php://memory', 200, ['Content-Type' => 'application/octet-stream']), + ); + $this->options->autoResolveTitles = true; + + $result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true); + + self::assertNull($result); + $request->shouldHaveBeenCalledOnce(); + } + + /** @test */ + public function validateUrlWithTitleReturnsNullWhenAutoResolutionIsEnabledAndBodyDoesNotContainTitle(): void + { + $request = $this->httpClient->request(RequestMethodInterface::METHOD_GET, Argument::cetera())->willReturn( + new Response($this->createStreamWithContent('<body>No title</body>'), 200, ['Content-Type' => 'text/html']), + ); + $this->options->autoResolveTitles = true; + + $result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true); + + self::assertNull($result); + $request->shouldHaveBeenCalledOnce(); + } + private function respWithTitle(): Response { - $body = new Stream('php://temp', 'wr'); - $body->write('<title data-foo="bar"> Resolved title</title>'); + $body = $this->createStreamWithContent('<title data-foo="bar"> Resolved title</title>'); + return new Response($body, 200, ['Content-Type' => 'TEXT/html; charset=utf-8']); + } - return new Response($body); + private function createStreamWithContent(string $content): Stream + { + $body = new Stream('php://temp', 'wr'); + $body->write($content); + $body->rewind(); + + return $body; } } From 9ae8804095a2a765a4b986758e33a8fbbf3f85aa Mon Sep 17 00:00:00 2001 From: Alejandro Celaya <alejandrocelaya@gmail.com> Date: Mon, 9 May 2022 08:00:54 +0200 Subject: [PATCH 4/4] Updated to openswoole 4.11.1 in docker images --- .github/workflows/ci.yml | 8 ++++---- .github/workflows/publish-release.yml | 2 +- .github/workflows/publish-swagger-spec.yml | 2 +- CHANGELOG.md | 4 ++-- Dockerfile | 2 +- composer.json | 2 +- data/infra/swoole.Dockerfile | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 38ccb54c..fb51e305 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: with: php-version: ${{ matrix.php-version }} tools: composer - extensions: openswoole-4.11.0 + extensions: openswoole-4.11.1 coverage: none - run: composer install --no-interaction --prefer-dist - run: composer ${{ matrix.command }} @@ -45,7 +45,7 @@ jobs: with: php-version: ${{ matrix.php-version }} tools: composer - extensions: openswoole-4.11.0 + extensions: openswoole-4.11.1 coverage: pcov ini-values: pcov.directory=module - run: composer install --no-interaction --prefer-dist @@ -80,7 +80,7 @@ jobs: with: php-version: ${{ matrix.php-version }} tools: composer - extensions: openswoole-4.11.0, pdo_sqlsrv-5.10.0 + extensions: openswoole-4.11.1, pdo_sqlsrv-5.10.0 coverage: pcov ini-values: pcov.directory=module - run: composer install --no-interaction --prefer-dist @@ -115,7 +115,7 @@ jobs: with: php-version: ${{ matrix.php-version }} tools: composer - extensions: openswoole-4.11.0 + extensions: openswoole-4.11.1 coverage: pcov ini-values: pcov.directory=module - run: composer install --no-interaction --prefer-dist diff --git a/.github/workflows/publish-release.yml b/.github/workflows/publish-release.yml index 6d50221a..45c48bd2 100644 --- a/.github/workflows/publish-release.yml +++ b/.github/workflows/publish-release.yml @@ -20,7 +20,7 @@ jobs: with: php-version: ${{ matrix.php-version }} tools: composer - extensions: openswoole-4.11.0 + extensions: openswoole-4.11.1 - if: ${{ matrix.swoole == 'yes' }} run: ./build.sh ${GITHUB_REF#refs/tags/v} - if: ${{ matrix.swoole == 'no' }} diff --git a/.github/workflows/publish-swagger-spec.yml b/.github/workflows/publish-swagger-spec.yml index 5b97ee6c..43313920 100644 --- a/.github/workflows/publish-swagger-spec.yml +++ b/.github/workflows/publish-swagger-spec.yml @@ -23,7 +23,7 @@ jobs: with: php-version: ${{ matrix.php-version }} tools: composer - extensions: openswoole-4.11.0 + extensions: openswoole-4.11.1 coverage: none - run: composer install --no-interaction --prefer-dist - run: composer swagger:inline diff --git a/CHANGELOG.md b/CHANGELOG.md index fdaa75e6..97153daf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,12 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com), and this project adheres to [Semantic Versioning](https://semver.org). -## [Unreleased] +## [3.1.1] - 2022-05-09 ### Added * *Nothing* ### Changed -* *Nothing* +* [#1444](https://github.com/shlinkio/shlink/issues/1444) Updated docker image to openswoole 4.11.1, in an attempt to fix error. ### Deprecated * *Nothing* diff --git a/Dockerfile b/Dockerfile index 0b3ee781..940364a3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM php:8.1.5-alpine3.15 as base ARG SHLINK_VERSION=latest ENV SHLINK_VERSION ${SHLINK_VERSION} -ENV OPENSWOOLE_VERSION 4.11.0 +ENV OPENSWOOLE_VERSION 4.11.1 ENV PDO_SQLSRV_VERSION 5.10.0 ENV MS_ODBC_SQL_VERSION 17.5.2.2 ENV LC_ALL "C" diff --git a/composer.json b/composer.json index d310a103..4268f85b 100644 --- a/composer.json +++ b/composer.json @@ -65,7 +65,7 @@ "devster/ubench": "^2.1", "dms/phpunit-arraysubset-asserts": "^0.3.0", "infection/infection": "^0.26.5", - "openswoole/ide-helper": "~4.11.0", + "openswoole/ide-helper": "~4.11.1", "phpspec/prophecy-phpunit": "^2.0", "phpstan/phpstan": "^1.2", "phpstan/phpstan-doctrine": "^1.0", diff --git a/data/infra/swoole.Dockerfile b/data/infra/swoole.Dockerfile index b7f26c7c..144eeb08 100644 --- a/data/infra/swoole.Dockerfile +++ b/data/infra/swoole.Dockerfile @@ -3,7 +3,7 @@ MAINTAINER Alejandro Celaya <alejandro@alejandrocelaya.com> ENV APCU_VERSION 5.1.21 ENV INOTIFY_VERSION 3.0.0 -ENV OPENSWOOLE_VERSION 4.11.0 +ENV OPENSWOOLE_VERSION 4.11.1 ENV PDO_SQLSRV_VERSION 5.10.0 ENV MS_ODBC_SQL_VERSION 17.5.2.2