mirror of
https://github.com/shlinkio/shlink.git
synced 2024-11-27 16:26:37 +03:00
Merge pull request #2107 from acelaya-forks/feature/robots-allow-all
Add option to allow all URLs to be crawlable via robots.txt
This commit is contained in:
commit
59fa088975
8 changed files with 72 additions and 14 deletions
17
CHANGELOG.md
17
CHANGELOG.md
|
@ -4,6 +4,23 @@ All notable changes to this project will be documented in this file.
|
||||||
|
|
||||||
The format is based on [Keep a Changelog](https://keepachangelog.com), and this project adheres to [Semantic Versioning](https://semver.org).
|
The format is based on [Keep a Changelog](https://keepachangelog.com), and this project adheres to [Semantic Versioning](https://semver.org).
|
||||||
|
|
||||||
|
## [Unreleased]
|
||||||
|
### Added
|
||||||
|
* [#2018](https://github.com/shlinkio/shlink/issues/2018) Add option to allow all short URLs to be unconditionally crawlable in robots.txt, via `ROBOTS_ALLOW_ALL_SHORT_URLS=true` env var, or config options.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
* *Nothing*
|
||||||
|
|
||||||
|
### Deprecated
|
||||||
|
* *Nothing*
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
* *Nothing*
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
* *Nothing*
|
||||||
|
|
||||||
|
|
||||||
## [4.1.0] - 2024-04-14
|
## [4.1.0] - 2024-04-14
|
||||||
### Added
|
### Added
|
||||||
* [#1330](https://github.com/shlinkio/shlink/issues/1330) All visit-related endpoints now expose the `visitedUrl` prop for any visit.
|
* [#1330](https://github.com/shlinkio/shlink/issues/1330) All visit-related endpoints now expose the `visitedUrl` prop for any visit.
|
||||||
|
|
|
@ -47,7 +47,7 @@
|
||||||
"shlinkio/shlink-config": "^3.0",
|
"shlinkio/shlink-config": "^3.0",
|
||||||
"shlinkio/shlink-event-dispatcher": "^4.1",
|
"shlinkio/shlink-event-dispatcher": "^4.1",
|
||||||
"shlinkio/shlink-importer": "^5.3.2",
|
"shlinkio/shlink-importer": "^5.3.2",
|
||||||
"shlinkio/shlink-installer": "^9.1",
|
"shlinkio/shlink-installer": "dev-develop#11e66d8 as 9.2",
|
||||||
"shlinkio/shlink-ip-geolocation": "^4.0",
|
"shlinkio/shlink-ip-geolocation": "^4.0",
|
||||||
"shlinkio/shlink-json": "^1.1",
|
"shlinkio/shlink-json": "^1.1",
|
||||||
"spiral/roadrunner": "^2023.3",
|
"spiral/roadrunner": "^2023.3",
|
||||||
|
|
|
@ -45,6 +45,7 @@ return [
|
||||||
Option\UrlShortener\EnableMultiSegmentSlugsConfigOption::class,
|
Option\UrlShortener\EnableMultiSegmentSlugsConfigOption::class,
|
||||||
Option\UrlShortener\EnableTrailingSlashConfigOption::class,
|
Option\UrlShortener\EnableTrailingSlashConfigOption::class,
|
||||||
Option\UrlShortener\ShortUrlModeConfigOption::class,
|
Option\UrlShortener\ShortUrlModeConfigOption::class,
|
||||||
|
Option\UrlShortener\RobotsAllowAllShortUrlsConfigOption::class,
|
||||||
Option\Tracking\IpAnonymizationConfigOption::class,
|
Option\Tracking\IpAnonymizationConfigOption::class,
|
||||||
Option\Tracking\OrphanVisitsTrackingConfigOption::class,
|
Option\Tracking\OrphanVisitsTrackingConfigOption::class,
|
||||||
Option\Tracking\DisableTrackParamConfigOption::class,
|
Option\Tracking\DisableTrackParamConfigOption::class,
|
||||||
|
|
13
config/autoload/robots.global.php
Normal file
13
config/autoload/robots.global.php
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace Shlinkio\Shlink\Core;
|
||||||
|
|
||||||
|
return [
|
||||||
|
|
||||||
|
'robots' => [
|
||||||
|
'allow-all-short-urls' => (bool) Config\EnvVars::ROBOTS_ALLOW_ALL_SHORT_URLS->loadFromEnv(false),
|
||||||
|
],
|
||||||
|
|
||||||
|
];
|
|
@ -189,7 +189,7 @@ return [
|
||||||
'Logger_Shlink',
|
'Logger_Shlink',
|
||||||
Options\QrCodeOptions::class,
|
Options\QrCodeOptions::class,
|
||||||
],
|
],
|
||||||
Action\RobotsAction::class => [Crawling\CrawlingHelper::class],
|
Action\RobotsAction::class => [Crawling\CrawlingHelper::class, 'config.robots.allow-all-short-urls'],
|
||||||
|
|
||||||
ShortUrl\Resolver\PersistenceShortUrlRelationResolver::class => [
|
ShortUrl\Resolver\PersistenceShortUrlRelationResolver::class => [
|
||||||
'em',
|
'em',
|
||||||
|
|
|
@ -15,9 +15,9 @@ use function sprintf;
|
||||||
|
|
||||||
use const PHP_EOL;
|
use const PHP_EOL;
|
||||||
|
|
||||||
class RobotsAction implements RequestHandlerInterface, StatusCodeInterface
|
readonly class RobotsAction implements RequestHandlerInterface, StatusCodeInterface
|
||||||
{
|
{
|
||||||
public function __construct(private readonly CrawlingHelperInterface $crawlingHelper)
|
public function __construct(private CrawlingHelperInterface $crawlingHelper, private bool $allowAllShortUrls)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,6 +37,12 @@ class RobotsAction implements RequestHandlerInterface, StatusCodeInterface
|
||||||
|
|
||||||
ROBOTS;
|
ROBOTS;
|
||||||
|
|
||||||
|
if ($this->allowAllShortUrls) {
|
||||||
|
// Disallow rest URLs, but allow all short codes
|
||||||
|
yield 'Disallow: /rest/';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
$shortCodes = $this->crawlingHelper->listCrawlableShortCodes();
|
$shortCodes = $this->crawlingHelper->listCrawlableShortCodes();
|
||||||
foreach ($shortCodes as $shortCode) {
|
foreach ($shortCodes as $shortCode) {
|
||||||
yield sprintf('Allow: /%s%s', $shortCode, PHP_EOL);
|
yield sprintf('Allow: /%s%s', $shortCode, PHP_EOL);
|
||||||
|
|
|
@ -69,8 +69,9 @@ enum EnvVars: string
|
||||||
case DEFAULT_DOMAIN = 'DEFAULT_DOMAIN';
|
case DEFAULT_DOMAIN = 'DEFAULT_DOMAIN';
|
||||||
case AUTO_RESOLVE_TITLES = 'AUTO_RESOLVE_TITLES';
|
case AUTO_RESOLVE_TITLES = 'AUTO_RESOLVE_TITLES';
|
||||||
case REDIRECT_APPEND_EXTRA_PATH = 'REDIRECT_APPEND_EXTRA_PATH';
|
case REDIRECT_APPEND_EXTRA_PATH = 'REDIRECT_APPEND_EXTRA_PATH';
|
||||||
case TIMEZONE = 'TIMEZONE';
|
|
||||||
case MULTI_SEGMENT_SLUGS_ENABLED = 'MULTI_SEGMENT_SLUGS_ENABLED';
|
case MULTI_SEGMENT_SLUGS_ENABLED = 'MULTI_SEGMENT_SLUGS_ENABLED';
|
||||||
|
case ROBOTS_ALLOW_ALL_SHORT_URLS = 'ROBOTS_ALLOW_ALL_SHORT_URLS';
|
||||||
|
case TIMEZONE = 'TIMEZONE';
|
||||||
case MEMORY_LIMIT = 'MEMORY_LIMIT';
|
case MEMORY_LIMIT = 'MEMORY_LIMIT';
|
||||||
|
|
||||||
public function loadFromEnv(mixed $default = null): mixed
|
public function loadFromEnv(mixed $default = null): mixed
|
||||||
|
|
|
@ -14,24 +14,25 @@ use Shlinkio\Shlink\Core\Crawling\CrawlingHelperInterface;
|
||||||
|
|
||||||
class RobotsActionTest extends TestCase
|
class RobotsActionTest extends TestCase
|
||||||
{
|
{
|
||||||
private RobotsAction $action;
|
|
||||||
private MockObject & CrawlingHelperInterface $helper;
|
private MockObject & CrawlingHelperInterface $helper;
|
||||||
|
|
||||||
protected function setUp(): void
|
protected function setUp(): void
|
||||||
{
|
{
|
||||||
$this->helper = $this->createMock(CrawlingHelperInterface::class);
|
$this->helper = $this->createMock(CrawlingHelperInterface::class);
|
||||||
$this->action = new RobotsAction($this->helper);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[Test, DataProvider('provideShortCodes')]
|
#[Test, DataProvider('provideShortCodes')]
|
||||||
public function buildsRobotsLinesFromCrawlableShortCodes(array $shortCodes, string $expected): void
|
public function buildsRobotsLinesFromCrawlableShortCodes(
|
||||||
{
|
array $shortCodes,
|
||||||
|
bool $allowAllShortUrls,
|
||||||
|
string $expected,
|
||||||
|
): void {
|
||||||
$this->helper
|
$this->helper
|
||||||
->expects($this->once())
|
->expects($allowAllShortUrls ? $this->never() : $this->once())
|
||||||
->method('listCrawlableShortCodes')
|
->method('listCrawlableShortCodes')
|
||||||
->willReturn($shortCodes);
|
->willReturn($shortCodes);
|
||||||
|
|
||||||
$response = $this->action->handle(ServerRequestFactory::fromGlobals());
|
$response = $this->action($allowAllShortUrls)->handle(ServerRequestFactory::fromGlobals());
|
||||||
|
|
||||||
self::assertEquals(200, $response->getStatusCode());
|
self::assertEquals(200, $response->getStatusCode());
|
||||||
self::assertEquals($expected, $response->getBody()->__toString());
|
self::assertEquals($expected, $response->getBody()->__toString());
|
||||||
|
@ -40,7 +41,7 @@ class RobotsActionTest extends TestCase
|
||||||
|
|
||||||
public static function provideShortCodes(): iterable
|
public static function provideShortCodes(): iterable
|
||||||
{
|
{
|
||||||
yield 'three short codes' => [['foo', 'bar', 'baz'], <<<ROBOTS
|
yield 'three short codes' => [['foo', 'bar', 'baz'], false, <<<ROBOTS
|
||||||
# For more information about the robots.txt standard, see:
|
# For more information about the robots.txt standard, see:
|
||||||
# https://www.robotstxt.org/orig.html
|
# https://www.robotstxt.org/orig.html
|
||||||
|
|
||||||
|
@ -50,7 +51,7 @@ class RobotsActionTest extends TestCase
|
||||||
Allow: /baz
|
Allow: /baz
|
||||||
Disallow: /
|
Disallow: /
|
||||||
ROBOTS];
|
ROBOTS];
|
||||||
yield 'five short codes' => [['foo', 'bar', 'some', 'thing', 'baz'], <<<ROBOTS
|
yield 'five short codes' => [['foo', 'bar', 'some', 'thing', 'baz'], false, <<<ROBOTS
|
||||||
# For more information about the robots.txt standard, see:
|
# For more information about the robots.txt standard, see:
|
||||||
# https://www.robotstxt.org/orig.html
|
# https://www.robotstxt.org/orig.html
|
||||||
|
|
||||||
|
@ -62,12 +63,31 @@ class RobotsActionTest extends TestCase
|
||||||
Allow: /baz
|
Allow: /baz
|
||||||
Disallow: /
|
Disallow: /
|
||||||
ROBOTS];
|
ROBOTS];
|
||||||
yield 'no short codes' => [[], <<<ROBOTS
|
yield 'no short codes' => [[], false, <<<ROBOTS
|
||||||
# For more information about the robots.txt standard, see:
|
# For more information about the robots.txt standard, see:
|
||||||
# https://www.robotstxt.org/orig.html
|
# https://www.robotstxt.org/orig.html
|
||||||
|
|
||||||
User-agent: *
|
User-agent: *
|
||||||
Disallow: /
|
Disallow: /
|
||||||
ROBOTS];
|
ROBOTS];
|
||||||
|
yield 'three short codes and allow all short urls' => [['foo', 'bar', 'some'], true, <<<ROBOTS
|
||||||
|
# For more information about the robots.txt standard, see:
|
||||||
|
# https://www.robotstxt.org/orig.html
|
||||||
|
|
||||||
|
User-agent: *
|
||||||
|
Disallow: /rest/
|
||||||
|
ROBOTS];
|
||||||
|
yield 'no short codes and allow all short urls' => [[], true, <<<ROBOTS
|
||||||
|
# For more information about the robots.txt standard, see:
|
||||||
|
# https://www.robotstxt.org/orig.html
|
||||||
|
|
||||||
|
User-agent: *
|
||||||
|
Disallow: /rest/
|
||||||
|
ROBOTS];
|
||||||
|
}
|
||||||
|
|
||||||
|
private function action(bool $allowAllShortUrls = false): RobotsAction
|
||||||
|
{
|
||||||
|
return new RobotsAction($this->helper, allowAllShortUrls: $allowAllShortUrls);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue