Merge pull request #761 from acelaya-forks/feature/optional-obfuscation

Feature/optional obfuscation
This commit is contained in:
Alejandro Celaya 2020-05-08 16:03:11 +02:00 committed by GitHub
commit ae060f3b13
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 63 additions and 36 deletions

View file

@ -26,6 +26,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com), and this
Now, if the `withStats=true` query param is provided, the response payload will include a new `stats` property which is a list with the amount of short URLs and visits for every tag.
* [#640](https://github.com/shlinkio/shlink/issues/640) Allowed to optionally disable visitors' IP address anonymization. This will make Shlink no longer be GDPR-compliant, but it's OK if you only plan to share your URLs in countries without this regulation.
#### Changed
* [#692](https://github.com/shlinkio/shlink/issues/692) Drastically improved performance when loading visits. Specially noticeable when loading big result sets.

View file

@ -51,7 +51,7 @@
"shlinkio/shlink-common": "dev-master#e659cf9d9b5b3b131419e2f55f2e595f562baafc as 3.1.0",
"shlinkio/shlink-config": "^1.0",
"shlinkio/shlink-event-dispatcher": "^1.4",
"shlinkio/shlink-installer": "dev-master#dae6644587d0c1c59ca773722531551b9f436786 as 5.0.0",
"shlinkio/shlink-installer": "dev-master#50be18de1e505d2609d96c6cc86571b1b1ca7b57 as 5.0.0",
"shlinkio/shlink-ip-geolocation": "^1.4",
"symfony/console": "^5.0",
"symfony/filesystem": "^5.0",

View file

@ -36,6 +36,7 @@ return [
Option\Mercure\MercureInternalUrlConfigOption::class,
Option\Mercure\MercureJwtSecretConfigOption::class,
Option\UrlShortener\GeoLiteLicenseKeyConfigOption::class,
Option\UrlShortener\IpAnonymizationConfigOption::class,
],
'installation_commands' => [

View file

@ -12,6 +12,7 @@ return [
'hostname' => '',
],
'validate_url' => false,
'anonymize_remote_addr' => true,
'visits_webhooks' => [],
'default_short_codes_length' => DEFAULT_SHORT_CODES_LENGTH,
],

View file

@ -168,12 +168,12 @@ This is the complete list of supported env vars:
* `TASK_WORKER_NUM`: The amount of concurrent background tasks this shlink instance will be able to execute. Defaults to 16.
* `VISITS_WEBHOOKS`: A comma-separated list of URLs that will receive a `POST` request when a short URL receives a visit.
* `DEFAULT_SHORT_CODES_LENGTH`: The length you want generated short codes to have. It defaults to 5 and has to be at least 4, so any value smaller than that will fall back to 4.
* `GEOLITE_LICENSE_KEY`: The license key used to download new GeoLite2 database files. This is not mandatory, as a default license key is provided, but it is **strongly recommended** that you provide your own. Go to [https://shlink.io/documentation/geolite-license-key](https://shlink.io/documentation/geolite-license-key) to know how to generate it.
* `REDIS_SERVERS`: A comma-separated list of redis servers where Shlink locks are stored (locks are used to prevent some operations to be run more than once in parallel).
* `MERCURE_PUBLIC_HUB_URL`: The public URL of a mercure hub server to which Shlink will sent updates. This URL will also be served to consumers that want to subscribe to those updates.
* `MERCURE_INTERNAL_HUB_URL`: An internal URL for a mercure hub. Will be used only when publishing updates to mercure, and does not need to be public. If this is not provided but `MERCURE_PUBLIC_HUB_URL` was, the former one will be used to publish updates.
* `MERCURE_JWT_SECRET`: The secret key that was provided to the mercure hub server, in order to be able to generate valid JWTs for publishing/subscribing to that server.
* `GEOLITE_LICENSE_KEY`: The license key used to download new GeoLite2 database files. This is not mandatory, as a default license key is provided, but it is **strongly recommended** that you provide your own. Go to [https://shlink.io/documentation/geolite-license-key](https://shlink.io/documentation/geolite-license-key) to know how to generate it.
* `ANONYMIZE_REMOTE_ADDR`: Tells if IP addresses from visitors should be obfuscated before storing them in the database. Default value is `true`. **Careful!** Setting this to `false` will make your Shlink instance no longer be in compliance with the GDPR and other similar data protection regulations.
An example using all env vars could look like this:
@ -205,6 +205,7 @@ docker run \
-e "MERCURE_PUBLIC_HUB_URL=https://example.com" \
-e "MERCURE_INTERNAL_HUB_URL=http://my-mercure-hub.prod.svc.cluster.local" \
-e MERCURE_JWT_SECRET=super_secret_key \
-e ANONYMIZE_REMOTE_ADDR=false \
shlinkio/shlink:stable
```
@ -249,7 +250,8 @@ The whole configuration should have this format, but it can be split into multip
"geolite_license_key": "kjh23ljkbndskj345",
"mercure_public_hub_url": "https://example.com",
"mercure_internal_hub_url": "http://my-mercure-hub.prod.svc.cluster.local",
"mercure_jwt_secret": "super_secret_key"
"mercure_jwt_secret": "super_secret_key",
"anonymize_remote_addr": false
}
```

View file

@ -117,6 +117,7 @@ return [
'hostname' => env('SHORT_DOMAIN_HOST', ''),
],
'validate_url' => (bool) env('VALIDATE_URLS', false),
'anonymize_remote_addr' => (bool) env('ANONYMIZE_REMOTE_ADDR', true),
'visits_webhooks' => $helper->getVisitsWebhooks(),
'default_short_codes_length' => $helper->getDefaultShortCodesLength(),
],

View file

@ -54,7 +54,11 @@ return [
Options\UrlShortenerOptions::class => ['config.url_shortener'],
Service\UrlShortener::class => [Util\UrlValidator::class, 'em', Resolver\PersistenceDomainResolver::class],
Service\VisitsTracker::class => ['em', EventDispatcherInterface::class],
Service\VisitsTracker::class => [
'em',
EventDispatcherInterface::class,
'config.url_shortener.anonymize_remote_addr',
],
Service\ShortUrlService::class => ['em', Service\ShortUrl\ShortUrlResolver::class, Util\UrlValidator::class],
Visit\VisitLocator::class => ['em'],
Visit\VisitsStatsHelper::class => ['em'],

View file

@ -37,6 +37,7 @@ class SimplifiedConfigParser
'mercure_public_hub_url' => ['mercure', 'public_hub_url'],
'mercure_internal_hub_url' => ['mercure', 'internal_hub_url'],
'mercure_jwt_secret' => ['mercure', 'jwt_secret'],
'anonymize_remote_addr' => ['url_shortener', 'anonymize_remote_addr'],
];
private const SIMPLIFIED_CONFIG_SIDE_EFFECTS = [
'delete_short_url_threshold' => [

View file

@ -21,19 +21,19 @@ class Visit extends AbstractEntity implements JsonSerializable
private ShortUrl $shortUrl;
private ?VisitLocation $visitLocation = null;
public function __construct(ShortUrl $shortUrl, Visitor $visitor, ?Chronos $date = null)
public function __construct(ShortUrl $shortUrl, Visitor $visitor, bool $anonymize = true, ?Chronos $date = null)
{
$this->shortUrl = $shortUrl;
$this->date = $date ?? Chronos::now();
$this->userAgent = $visitor->getUserAgent();
$this->referer = $visitor->getReferer();
$this->remoteAddr = $this->obfuscateAddress($visitor->getRemoteAddress());
$this->remoteAddr = $this->processAddress($anonymize, $visitor->getRemoteAddress());
}
private function obfuscateAddress(?string $address): ?string
private function processAddress(bool $anonymize, ?string $address): ?string
{
// Localhost addresses do not need to be obfuscated
if ($address === null || $address === IpAddress::LOCALHOST) {
// Localhost addresses do not need to be anonymized
if (! $anonymize || $address === null || $address === IpAddress::LOCALHOST) {
return $address;
}

View file

@ -22,11 +22,16 @@ class VisitsTracker implements VisitsTrackerInterface
{
private ORM\EntityManagerInterface $em;
private EventDispatcherInterface $eventDispatcher;
private bool $anonymizeRemoteAddr;
public function __construct(ORM\EntityManagerInterface $em, EventDispatcherInterface $eventDispatcher)
{
public function __construct(
ORM\EntityManagerInterface $em,
EventDispatcherInterface $eventDispatcher,
bool $anonymizeRemoteAddr
) {
$this->em = $em;
$this->eventDispatcher = $eventDispatcher;
$this->anonymizeRemoteAddr = $anonymizeRemoteAddr;
}
/**
@ -34,7 +39,7 @@ class VisitsTracker implements VisitsTrackerInterface
*/
public function track(ShortUrl $shortUrl, Visitor $visitor): void
{
$visit = new Visit($shortUrl, $visitor);
$visit = new Visit($shortUrl, $visitor, $this->anonymizeRemoteAddr);
$this->em->persist($visit);
$this->em->flush();

View file

@ -139,13 +139,19 @@ class VisitRepositoryTest extends DatabaseTestCase
$this->getEntityManager()->persist($shortUrlWithDomain);
for ($i = 0; $i < 6; $i++) {
$visit = new Visit($shortUrl, Visitor::emptyInstance(), Chronos::parse(sprintf('2016-01-0%s', $i + 1)));
$visit = new Visit(
$shortUrl,
Visitor::emptyInstance(),
true,
Chronos::parse(sprintf('2016-01-0%s', $i + 1)),
);
$this->getEntityManager()->persist($visit);
}
for ($i = 0; $i < 3; $i++) {
$visit = new Visit(
$shortUrlWithDomain,
Visitor::emptyInstance(),
true,
Chronos::parse(sprintf('2016-01-0%s', $i + 1)),
);
$this->getEntityManager()->persist($visit);

View file

@ -64,6 +64,7 @@ class SimplifiedConfigParserTest extends TestCase
'mercure_public_hub_url' => 'public_url',
'mercure_internal_hub_url' => 'internal_url',
'mercure_jwt_secret' => 'super_secret_value',
'anonymize_remote_addr' => false,
];
$expected = [
'app_options' => [
@ -92,6 +93,7 @@ class SimplifiedConfigParserTest extends TestCase
'https://third-party.io/foo',
],
'default_short_codes_length' => 8,
'anonymize_remote_addr' => false,
],
'delete_short_urls' => [

View file

@ -6,6 +6,7 @@ namespace ShlinkioTest\Shlink\Core\Entity;
use Cake\Chronos\Chronos;
use PHPUnit\Framework\TestCase;
use Shlinkio\Shlink\Common\Util\IpAddress;
use Shlinkio\Shlink\Core\Entity\ShortUrl;
use Shlinkio\Shlink\Core\Entity\Visit;
use Shlinkio\Shlink\Core\Model\Visitor;
@ -18,7 +19,7 @@ class VisitTest extends TestCase
*/
public function isProperlyJsonSerialized(?Chronos $date): void
{
$visit = new Visit(new ShortUrl(''), new Visitor('Chrome', 'some site', '1.2.3.4'), $date);
$visit = new Visit(new ShortUrl(''), new Visitor('Chrome', 'some site', '1.2.3.4'), true, $date);
$this->assertEquals([
'referer' => 'some site',
@ -33,4 +34,25 @@ class VisitTest extends TestCase
yield 'null date' => [null];
yield 'not null date' => [Chronos::now()->subDays(10)];
}
/**
* @test
* @dataProvider provideAddresses
*/
public function addressIsAnonymizedWhenRequested(bool $anonymize, ?string $address, ?string $expectedAddress): void
{
$visit = new Visit(new ShortUrl(''), new Visitor('Chrome', 'some site', $address), $anonymize);
$this->assertEquals($expectedAddress, $visit->getRemoteAddr());
}
public function provideAddresses(): iterable
{
yield 'anonymized null address' => [true, null, null];
yield 'non-anonymized null address' => [false, null, null];
yield 'anonymized localhost' => [true, IpAddress::LOCALHOST, IpAddress::LOCALHOST];
yield 'non-anonymized localhost' => [false, IpAddress::LOCALHOST, IpAddress::LOCALHOST];
yield 'anonymized regular address' => [true, '1.2.3.4', '1.2.3.0'];
yield 'non-anonymized regular address' => [false, '1.2.3.4', '1.2.3.4'];
}
}

View file

@ -6,7 +6,6 @@ namespace ShlinkioTest\Shlink\Core\Service;
use Doctrine\ORM\EntityManager;
use Laminas\Stdlib\ArrayUtils;
use PHPUnit\Framework\Assert;
use PHPUnit\Framework\TestCase;
use Prophecy\Argument;
use Prophecy\Prophecy\ObjectProphecy;
@ -37,7 +36,7 @@ class VisitsTrackerTest extends TestCase
$this->em = $this->prophesize(EntityManager::class);
$this->eventDispatcher = $this->prophesize(EventDispatcherInterface::class);
$this->visitsTracker = new VisitsTracker($this->em->reveal(), $this->eventDispatcher->reveal());
$this->visitsTracker = new VisitsTracker($this->em->reveal(), $this->eventDispatcher->reveal(), true);
}
/** @test */
@ -53,25 +52,6 @@ class VisitsTrackerTest extends TestCase
$this->eventDispatcher->dispatch(Argument::type(ShortUrlVisited::class))->shouldHaveBeenCalled();
}
/** @test */
public function trackedIpAddressGetsObfuscated(): void
{
$shortCode = '123ABC';
$this->em->persist(Argument::any())->will(function ($args) {
/** @var Visit $visit */
$visit = $args[0];
Assert::assertEquals('4.3.2.0', $visit->getRemoteAddr());
$visit->setId('1');
return $visit;
})->shouldBeCalledOnce();
$this->em->flush()->shouldBeCalledOnce();
$this->visitsTracker->track(new ShortUrl($shortCode), new Visitor('', '', '4.3.2.1'));
$this->eventDispatcher->dispatch(Argument::type(ShortUrlVisited::class))->shouldHaveBeenCalled();
}
/** @test */
public function infoReturnsVisitsForCertainShortCode(): void
{