Merge pull request #761 from acelaya-forks/feature/optional-obfuscation

Feature/optional obfuscation
This commit is contained in:
Alejandro Celaya 2020-05-08 16:03:11 +02:00 committed by GitHub
commit ae060f3b13
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 63 additions and 36 deletions

View file

@ -26,6 +26,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com), and this
Now, if the `withStats=true` query param is provided, the response payload will include a new `stats` property which is a list with the amount of short URLs and visits for every tag. Now, if the `withStats=true` query param is provided, the response payload will include a new `stats` property which is a list with the amount of short URLs and visits for every tag.
* [#640](https://github.com/shlinkio/shlink/issues/640) Allowed to optionally disable visitors' IP address anonymization. This will make Shlink no longer be GDPR-compliant, but it's OK if you only plan to share your URLs in countries without this regulation.
#### Changed #### Changed
* [#692](https://github.com/shlinkio/shlink/issues/692) Drastically improved performance when loading visits. Specially noticeable when loading big result sets. * [#692](https://github.com/shlinkio/shlink/issues/692) Drastically improved performance when loading visits. Specially noticeable when loading big result sets.

View file

@ -51,7 +51,7 @@
"shlinkio/shlink-common": "dev-master#e659cf9d9b5b3b131419e2f55f2e595f562baafc as 3.1.0", "shlinkio/shlink-common": "dev-master#e659cf9d9b5b3b131419e2f55f2e595f562baafc as 3.1.0",
"shlinkio/shlink-config": "^1.0", "shlinkio/shlink-config": "^1.0",
"shlinkio/shlink-event-dispatcher": "^1.4", "shlinkio/shlink-event-dispatcher": "^1.4",
"shlinkio/shlink-installer": "dev-master#dae6644587d0c1c59ca773722531551b9f436786 as 5.0.0", "shlinkio/shlink-installer": "dev-master#50be18de1e505d2609d96c6cc86571b1b1ca7b57 as 5.0.0",
"shlinkio/shlink-ip-geolocation": "^1.4", "shlinkio/shlink-ip-geolocation": "^1.4",
"symfony/console": "^5.0", "symfony/console": "^5.0",
"symfony/filesystem": "^5.0", "symfony/filesystem": "^5.0",

View file

@ -36,6 +36,7 @@ return [
Option\Mercure\MercureInternalUrlConfigOption::class, Option\Mercure\MercureInternalUrlConfigOption::class,
Option\Mercure\MercureJwtSecretConfigOption::class, Option\Mercure\MercureJwtSecretConfigOption::class,
Option\UrlShortener\GeoLiteLicenseKeyConfigOption::class, Option\UrlShortener\GeoLiteLicenseKeyConfigOption::class,
Option\UrlShortener\IpAnonymizationConfigOption::class,
], ],
'installation_commands' => [ 'installation_commands' => [

View file

@ -12,6 +12,7 @@ return [
'hostname' => '', 'hostname' => '',
], ],
'validate_url' => false, 'validate_url' => false,
'anonymize_remote_addr' => true,
'visits_webhooks' => [], 'visits_webhooks' => [],
'default_short_codes_length' => DEFAULT_SHORT_CODES_LENGTH, 'default_short_codes_length' => DEFAULT_SHORT_CODES_LENGTH,
], ],

View file

@ -168,12 +168,12 @@ This is the complete list of supported env vars:
* `TASK_WORKER_NUM`: The amount of concurrent background tasks this shlink instance will be able to execute. Defaults to 16. * `TASK_WORKER_NUM`: The amount of concurrent background tasks this shlink instance will be able to execute. Defaults to 16.
* `VISITS_WEBHOOKS`: A comma-separated list of URLs that will receive a `POST` request when a short URL receives a visit. * `VISITS_WEBHOOKS`: A comma-separated list of URLs that will receive a `POST` request when a short URL receives a visit.
* `DEFAULT_SHORT_CODES_LENGTH`: The length you want generated short codes to have. It defaults to 5 and has to be at least 4, so any value smaller than that will fall back to 4. * `DEFAULT_SHORT_CODES_LENGTH`: The length you want generated short codes to have. It defaults to 5 and has to be at least 4, so any value smaller than that will fall back to 4.
* `GEOLITE_LICENSE_KEY`: The license key used to download new GeoLite2 database files. This is not mandatory, as a default license key is provided, but it is **strongly recommended** that you provide your own. Go to [https://shlink.io/documentation/geolite-license-key](https://shlink.io/documentation/geolite-license-key) to know how to generate it.
* `REDIS_SERVERS`: A comma-separated list of redis servers where Shlink locks are stored (locks are used to prevent some operations to be run more than once in parallel). * `REDIS_SERVERS`: A comma-separated list of redis servers where Shlink locks are stored (locks are used to prevent some operations to be run more than once in parallel).
* `MERCURE_PUBLIC_HUB_URL`: The public URL of a mercure hub server to which Shlink will sent updates. This URL will also be served to consumers that want to subscribe to those updates. * `MERCURE_PUBLIC_HUB_URL`: The public URL of a mercure hub server to which Shlink will sent updates. This URL will also be served to consumers that want to subscribe to those updates.
* `MERCURE_INTERNAL_HUB_URL`: An internal URL for a mercure hub. Will be used only when publishing updates to mercure, and does not need to be public. If this is not provided but `MERCURE_PUBLIC_HUB_URL` was, the former one will be used to publish updates. * `MERCURE_INTERNAL_HUB_URL`: An internal URL for a mercure hub. Will be used only when publishing updates to mercure, and does not need to be public. If this is not provided but `MERCURE_PUBLIC_HUB_URL` was, the former one will be used to publish updates.
* `MERCURE_JWT_SECRET`: The secret key that was provided to the mercure hub server, in order to be able to generate valid JWTs for publishing/subscribing to that server. * `MERCURE_JWT_SECRET`: The secret key that was provided to the mercure hub server, in order to be able to generate valid JWTs for publishing/subscribing to that server.
* `ANONYMIZE_REMOTE_ADDR`: Tells if IP addresses from visitors should be obfuscated before storing them in the database. Default value is `true`. **Careful!** Setting this to `false` will make your Shlink instance no longer be in compliance with the GDPR and other similar data protection regulations.
* `GEOLITE_LICENSE_KEY`: The license key used to download new GeoLite2 database files. This is not mandatory, as a default license key is provided, but it is **strongly recommended** that you provide your own. Go to [https://shlink.io/documentation/geolite-license-key](https://shlink.io/documentation/geolite-license-key) to know how to generate it.
An example using all env vars could look like this: An example using all env vars could look like this:
@ -205,6 +205,7 @@ docker run \
-e "MERCURE_PUBLIC_HUB_URL=https://example.com" \ -e "MERCURE_PUBLIC_HUB_URL=https://example.com" \
-e "MERCURE_INTERNAL_HUB_URL=http://my-mercure-hub.prod.svc.cluster.local" \ -e "MERCURE_INTERNAL_HUB_URL=http://my-mercure-hub.prod.svc.cluster.local" \
-e MERCURE_JWT_SECRET=super_secret_key \ -e MERCURE_JWT_SECRET=super_secret_key \
-e ANONYMIZE_REMOTE_ADDR=false \
shlinkio/shlink:stable shlinkio/shlink:stable
``` ```
@ -249,7 +250,8 @@ The whole configuration should have this format, but it can be split into multip
"geolite_license_key": "kjh23ljkbndskj345", "geolite_license_key": "kjh23ljkbndskj345",
"mercure_public_hub_url": "https://example.com", "mercure_public_hub_url": "https://example.com",
"mercure_internal_hub_url": "http://my-mercure-hub.prod.svc.cluster.local", "mercure_internal_hub_url": "http://my-mercure-hub.prod.svc.cluster.local",
"mercure_jwt_secret": "super_secret_key" "mercure_jwt_secret": "super_secret_key",
"anonymize_remote_addr": false
} }
``` ```

View file

@ -117,6 +117,7 @@ return [
'hostname' => env('SHORT_DOMAIN_HOST', ''), 'hostname' => env('SHORT_DOMAIN_HOST', ''),
], ],
'validate_url' => (bool) env('VALIDATE_URLS', false), 'validate_url' => (bool) env('VALIDATE_URLS', false),
'anonymize_remote_addr' => (bool) env('ANONYMIZE_REMOTE_ADDR', true),
'visits_webhooks' => $helper->getVisitsWebhooks(), 'visits_webhooks' => $helper->getVisitsWebhooks(),
'default_short_codes_length' => $helper->getDefaultShortCodesLength(), 'default_short_codes_length' => $helper->getDefaultShortCodesLength(),
], ],

View file

@ -54,7 +54,11 @@ return [
Options\UrlShortenerOptions::class => ['config.url_shortener'], Options\UrlShortenerOptions::class => ['config.url_shortener'],
Service\UrlShortener::class => [Util\UrlValidator::class, 'em', Resolver\PersistenceDomainResolver::class], Service\UrlShortener::class => [Util\UrlValidator::class, 'em', Resolver\PersistenceDomainResolver::class],
Service\VisitsTracker::class => ['em', EventDispatcherInterface::class], Service\VisitsTracker::class => [
'em',
EventDispatcherInterface::class,
'config.url_shortener.anonymize_remote_addr',
],
Service\ShortUrlService::class => ['em', Service\ShortUrl\ShortUrlResolver::class, Util\UrlValidator::class], Service\ShortUrlService::class => ['em', Service\ShortUrl\ShortUrlResolver::class, Util\UrlValidator::class],
Visit\VisitLocator::class => ['em'], Visit\VisitLocator::class => ['em'],
Visit\VisitsStatsHelper::class => ['em'], Visit\VisitsStatsHelper::class => ['em'],

View file

@ -37,6 +37,7 @@ class SimplifiedConfigParser
'mercure_public_hub_url' => ['mercure', 'public_hub_url'], 'mercure_public_hub_url' => ['mercure', 'public_hub_url'],
'mercure_internal_hub_url' => ['mercure', 'internal_hub_url'], 'mercure_internal_hub_url' => ['mercure', 'internal_hub_url'],
'mercure_jwt_secret' => ['mercure', 'jwt_secret'], 'mercure_jwt_secret' => ['mercure', 'jwt_secret'],
'anonymize_remote_addr' => ['url_shortener', 'anonymize_remote_addr'],
]; ];
private const SIMPLIFIED_CONFIG_SIDE_EFFECTS = [ private const SIMPLIFIED_CONFIG_SIDE_EFFECTS = [
'delete_short_url_threshold' => [ 'delete_short_url_threshold' => [

View file

@ -21,19 +21,19 @@ class Visit extends AbstractEntity implements JsonSerializable
private ShortUrl $shortUrl; private ShortUrl $shortUrl;
private ?VisitLocation $visitLocation = null; private ?VisitLocation $visitLocation = null;
public function __construct(ShortUrl $shortUrl, Visitor $visitor, ?Chronos $date = null) public function __construct(ShortUrl $shortUrl, Visitor $visitor, bool $anonymize = true, ?Chronos $date = null)
{ {
$this->shortUrl = $shortUrl; $this->shortUrl = $shortUrl;
$this->date = $date ?? Chronos::now(); $this->date = $date ?? Chronos::now();
$this->userAgent = $visitor->getUserAgent(); $this->userAgent = $visitor->getUserAgent();
$this->referer = $visitor->getReferer(); $this->referer = $visitor->getReferer();
$this->remoteAddr = $this->obfuscateAddress($visitor->getRemoteAddress()); $this->remoteAddr = $this->processAddress($anonymize, $visitor->getRemoteAddress());
} }
private function obfuscateAddress(?string $address): ?string private function processAddress(bool $anonymize, ?string $address): ?string
{ {
// Localhost addresses do not need to be obfuscated // Localhost addresses do not need to be anonymized
if ($address === null || $address === IpAddress::LOCALHOST) { if (! $anonymize || $address === null || $address === IpAddress::LOCALHOST) {
return $address; return $address;
} }

View file

@ -22,11 +22,16 @@ class VisitsTracker implements VisitsTrackerInterface
{ {
private ORM\EntityManagerInterface $em; private ORM\EntityManagerInterface $em;
private EventDispatcherInterface $eventDispatcher; private EventDispatcherInterface $eventDispatcher;
private bool $anonymizeRemoteAddr;
public function __construct(ORM\EntityManagerInterface $em, EventDispatcherInterface $eventDispatcher) public function __construct(
{ ORM\EntityManagerInterface $em,
EventDispatcherInterface $eventDispatcher,
bool $anonymizeRemoteAddr
) {
$this->em = $em; $this->em = $em;
$this->eventDispatcher = $eventDispatcher; $this->eventDispatcher = $eventDispatcher;
$this->anonymizeRemoteAddr = $anonymizeRemoteAddr;
} }
/** /**
@ -34,7 +39,7 @@ class VisitsTracker implements VisitsTrackerInterface
*/ */
public function track(ShortUrl $shortUrl, Visitor $visitor): void public function track(ShortUrl $shortUrl, Visitor $visitor): void
{ {
$visit = new Visit($shortUrl, $visitor); $visit = new Visit($shortUrl, $visitor, $this->anonymizeRemoteAddr);
$this->em->persist($visit); $this->em->persist($visit);
$this->em->flush(); $this->em->flush();

View file

@ -139,13 +139,19 @@ class VisitRepositoryTest extends DatabaseTestCase
$this->getEntityManager()->persist($shortUrlWithDomain); $this->getEntityManager()->persist($shortUrlWithDomain);
for ($i = 0; $i < 6; $i++) { for ($i = 0; $i < 6; $i++) {
$visit = new Visit($shortUrl, Visitor::emptyInstance(), Chronos::parse(sprintf('2016-01-0%s', $i + 1))); $visit = new Visit(
$shortUrl,
Visitor::emptyInstance(),
true,
Chronos::parse(sprintf('2016-01-0%s', $i + 1)),
);
$this->getEntityManager()->persist($visit); $this->getEntityManager()->persist($visit);
} }
for ($i = 0; $i < 3; $i++) { for ($i = 0; $i < 3; $i++) {
$visit = new Visit( $visit = new Visit(
$shortUrlWithDomain, $shortUrlWithDomain,
Visitor::emptyInstance(), Visitor::emptyInstance(),
true,
Chronos::parse(sprintf('2016-01-0%s', $i + 1)), Chronos::parse(sprintf('2016-01-0%s', $i + 1)),
); );
$this->getEntityManager()->persist($visit); $this->getEntityManager()->persist($visit);

View file

@ -64,6 +64,7 @@ class SimplifiedConfigParserTest extends TestCase
'mercure_public_hub_url' => 'public_url', 'mercure_public_hub_url' => 'public_url',
'mercure_internal_hub_url' => 'internal_url', 'mercure_internal_hub_url' => 'internal_url',
'mercure_jwt_secret' => 'super_secret_value', 'mercure_jwt_secret' => 'super_secret_value',
'anonymize_remote_addr' => false,
]; ];
$expected = [ $expected = [
'app_options' => [ 'app_options' => [
@ -92,6 +93,7 @@ class SimplifiedConfigParserTest extends TestCase
'https://third-party.io/foo', 'https://third-party.io/foo',
], ],
'default_short_codes_length' => 8, 'default_short_codes_length' => 8,
'anonymize_remote_addr' => false,
], ],
'delete_short_urls' => [ 'delete_short_urls' => [

View file

@ -6,6 +6,7 @@ namespace ShlinkioTest\Shlink\Core\Entity;
use Cake\Chronos\Chronos; use Cake\Chronos\Chronos;
use PHPUnit\Framework\TestCase; use PHPUnit\Framework\TestCase;
use Shlinkio\Shlink\Common\Util\IpAddress;
use Shlinkio\Shlink\Core\Entity\ShortUrl; use Shlinkio\Shlink\Core\Entity\ShortUrl;
use Shlinkio\Shlink\Core\Entity\Visit; use Shlinkio\Shlink\Core\Entity\Visit;
use Shlinkio\Shlink\Core\Model\Visitor; use Shlinkio\Shlink\Core\Model\Visitor;
@ -18,7 +19,7 @@ class VisitTest extends TestCase
*/ */
public function isProperlyJsonSerialized(?Chronos $date): void public function isProperlyJsonSerialized(?Chronos $date): void
{ {
$visit = new Visit(new ShortUrl(''), new Visitor('Chrome', 'some site', '1.2.3.4'), $date); $visit = new Visit(new ShortUrl(''), new Visitor('Chrome', 'some site', '1.2.3.4'), true, $date);
$this->assertEquals([ $this->assertEquals([
'referer' => 'some site', 'referer' => 'some site',
@ -33,4 +34,25 @@ class VisitTest extends TestCase
yield 'null date' => [null]; yield 'null date' => [null];
yield 'not null date' => [Chronos::now()->subDays(10)]; yield 'not null date' => [Chronos::now()->subDays(10)];
} }
/**
* @test
* @dataProvider provideAddresses
*/
public function addressIsAnonymizedWhenRequested(bool $anonymize, ?string $address, ?string $expectedAddress): void
{
$visit = new Visit(new ShortUrl(''), new Visitor('Chrome', 'some site', $address), $anonymize);
$this->assertEquals($expectedAddress, $visit->getRemoteAddr());
}
public function provideAddresses(): iterable
{
yield 'anonymized null address' => [true, null, null];
yield 'non-anonymized null address' => [false, null, null];
yield 'anonymized localhost' => [true, IpAddress::LOCALHOST, IpAddress::LOCALHOST];
yield 'non-anonymized localhost' => [false, IpAddress::LOCALHOST, IpAddress::LOCALHOST];
yield 'anonymized regular address' => [true, '1.2.3.4', '1.2.3.0'];
yield 'non-anonymized regular address' => [false, '1.2.3.4', '1.2.3.4'];
}
} }

View file

@ -6,7 +6,6 @@ namespace ShlinkioTest\Shlink\Core\Service;
use Doctrine\ORM\EntityManager; use Doctrine\ORM\EntityManager;
use Laminas\Stdlib\ArrayUtils; use Laminas\Stdlib\ArrayUtils;
use PHPUnit\Framework\Assert;
use PHPUnit\Framework\TestCase; use PHPUnit\Framework\TestCase;
use Prophecy\Argument; use Prophecy\Argument;
use Prophecy\Prophecy\ObjectProphecy; use Prophecy\Prophecy\ObjectProphecy;
@ -37,7 +36,7 @@ class VisitsTrackerTest extends TestCase
$this->em = $this->prophesize(EntityManager::class); $this->em = $this->prophesize(EntityManager::class);
$this->eventDispatcher = $this->prophesize(EventDispatcherInterface::class); $this->eventDispatcher = $this->prophesize(EventDispatcherInterface::class);
$this->visitsTracker = new VisitsTracker($this->em->reveal(), $this->eventDispatcher->reveal()); $this->visitsTracker = new VisitsTracker($this->em->reveal(), $this->eventDispatcher->reveal(), true);
} }
/** @test */ /** @test */
@ -53,25 +52,6 @@ class VisitsTrackerTest extends TestCase
$this->eventDispatcher->dispatch(Argument::type(ShortUrlVisited::class))->shouldHaveBeenCalled(); $this->eventDispatcher->dispatch(Argument::type(ShortUrlVisited::class))->shouldHaveBeenCalled();
} }
/** @test */
public function trackedIpAddressGetsObfuscated(): void
{
$shortCode = '123ABC';
$this->em->persist(Argument::any())->will(function ($args) {
/** @var Visit $visit */
$visit = $args[0];
Assert::assertEquals('4.3.2.0', $visit->getRemoteAddr());
$visit->setId('1');
return $visit;
})->shouldBeCalledOnce();
$this->em->flush()->shouldBeCalledOnce();
$this->visitsTracker->track(new ShortUrl($shortCode), new Visitor('', '', '4.3.2.1'));
$this->eventDispatcher->dispatch(Argument::type(ShortUrlVisited::class))->shouldHaveBeenCalled();
}
/** @test */ /** @test */
public function infoReturnsVisitsForCertainShortCode(): void public function infoReturnsVisitsForCertainShortCode(): void
{ {