Improved how visits with some conditions are fetched from the database, so all internal logic is 100% transparent regardless the purpose

This commit is contained in:
Alejandro Celaya 2020-03-27 22:01:26 +01:00
parent f730c24ecb
commit 43a3d469e7
5 changed files with 49 additions and 71 deletions

View file

@ -5,79 +5,61 @@ declare(strict_types=1);
namespace Shlinkio\Shlink\Core\Repository;
use Doctrine\ORM\EntityRepository;
use Doctrine\ORM\Query;
use Doctrine\ORM\QueryBuilder;
use Shlinkio\Shlink\Common\Util\DateRange;
use Shlinkio\Shlink\Core\Entity\Visit;
class VisitRepository extends EntityRepository implements VisitRepositoryInterface
{
private const DEFAULT_BLOCK_SIZE = 10000;
/**
* This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in
* smaller blocks of a specific size.
* This will have side effects if you update those rows while you iterate them, in a way that they are no longer
* unlocated.
* If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the
* dataset.
*
* @return iterable|Visit[]
*/
public function findUnlocatedVisits(bool $applyOffset = true): iterable
public function findUnlocatedVisits(int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable
{
$dql = <<<DQL
SELECT v FROM Shlinkio\Shlink\Core\Entity\Visit AS v WHERE v.visitLocation IS NULL
DQL;
$query = $this->getEntityManager()->createQuery($dql);
$remainingVisitsToProcess = $this->count(['visitLocation' => null]);
$qb = $this->getEntityManager()->createQueryBuilder();
$qb->select('v')
->from(Visit::class, 'v')
->where($qb->expr()->isNull('v.visitLocation'));
return $this->findVisitsForQuery($query, $remainingVisitsToProcess, $applyOffset);
return $this->findVisitsForQuery($qb, $blockSize);
}
/**
* This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in
* smaller blocks of a specific size.
* This will have side effects if you update those rows while you iterate them, in a way that they are no longer
* unlocated.
* If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the
* dataset.
*
* @return iterable|Visit[]
*/
public function findVisitsWithEmptyLocation(bool $applyOffset = true): iterable
public function findVisitsWithEmptyLocation(int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable
{
$qb = $this->getEntityManager()->createQueryBuilder();
$qb->from(Visit::class, 'v')
$qb->select('v')
->from(Visit::class, 'v')
->join('v.visitLocation', 'vl')
->where($qb->expr()->isNotNull('v.visitLocation'))
->andWhere($qb->expr()->eq('vl.isEmpty', ':isEmpty'))
->setParameter('isEmpty', true);
$countQb = clone $qb;
$query = $qb->select('v')->getQuery();
$remainingVisitsToProcess = (int) $countQb->select('COUNT(DISTINCT v.id)')->getQuery()->getSingleScalarResult();
return $this->findVisitsForQuery($query, $remainingVisitsToProcess, $applyOffset);
return $this->findVisitsForQuery($qb, $blockSize);
}
private function findVisitsForQuery(Query $query, int $remainingVisitsToProcess, bool $applyOffset = true): iterable
private function findVisitsForQuery(QueryBuilder $qb, int $blockSize): iterable
{
$blockSize = self::DEFAULT_BLOCK_SIZE;
$query = $query->setMaxResults($blockSize);
$offset = 0;
$originalQueryBuilder = $qb->setMaxResults($blockSize)
->orderBy('v.id', 'ASC');
$lastId = '0';
// FIXME Do not use the $applyOffset workaround. Instead, always start with first result, but skip already
// processed results. That should work both if any entry is edited or not
while ($remainingVisitsToProcess > 0) {
$iterator = $query->setFirstResult($applyOffset ? $offset : null)->iterate();
foreach ($iterator as $key => [$value]) {
yield $key => $value;
do {
$qb = (clone $originalQueryBuilder)->andWhere($qb->expr()->gt('v.id', $lastId));
$iterator = $qb->getQuery()->iterate();
$resultsFound = false;
/** @var Visit $visit */
foreach ($iterator as $key => [$visit]) {
$resultsFound = true;
yield $key => $visit;
}
$remainingVisitsToProcess -= $blockSize;
$offset += $blockSize;
}
// As the query is ordered by ID, we can take the last one every time in order to exclude the whole list
$lastId = isset($visit) ? $visit->getId() : $lastId;
} while ($resultsFound);
}
/**

View file

@ -10,29 +10,17 @@ use Shlinkio\Shlink\Core\Entity\Visit;
interface VisitRepositoryInterface extends ObjectRepository
{
/**
* This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in
* smaller blocks of a specific size.
* This will have side effects if you update those rows while you iterate them, in a way that they are no longer
* unlocated.
* If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the
* dataset.
*
* @return iterable|Visit[]
*/
public function findUnlocatedVisits(bool $applyOffset = true): iterable;
public const DEFAULT_BLOCK_SIZE = 10000;
/**
* This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in
* smaller blocks of a specific size.
* This will have side effects if you update those rows while you iterate them, in a way that they are no longer
* unlocated.
* If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the
* dataset.
*
* @return iterable|Visit[]
*/
public function findVisitsWithEmptyLocation(bool $applyOffset = true): iterable;
public function findUnlocatedVisits(int $defaultBlockSize = self::DEFAULT_BLOCK_SIZE): iterable;
/**
* @return iterable|Visit[]
*/
public function findVisitsWithEmptyLocation(int $defaultBlockSize = self::DEFAULT_BLOCK_SIZE): iterable;
/**
* @return Visit[]

View file

@ -24,14 +24,14 @@ class VisitLocator implements VisitLocatorInterface
{
/** @var VisitRepository $repo */
$repo = $this->em->getRepository(Visit::class);
$this->locateVisits($repo->findUnlocatedVisits(false), $geolocateVisit, $notifyVisitWithLocation);
$this->locateVisits($repo->findUnlocatedVisits(), $geolocateVisit, $notifyVisitWithLocation);
}
public function locateVisitsWithEmptyLocation(callable $geolocateVisit, callable $notifyVisitWithLocation): void
{
/** @var VisitRepository $repo */
$repo = $this->em->getRepository(Visit::class);
$this->locateVisits($repo->findVisitsWithEmptyLocation(false), $geolocateVisit, $notifyVisitWithLocation);
$this->locateVisits($repo->findVisitsWithEmptyLocation(), $geolocateVisit, $notifyVisitWithLocation);
}
/**

View file

@ -36,8 +36,11 @@ class VisitRepositoryTest extends DatabaseTestCase
$this->repo = $this->getEntityManager()->getRepository(Visit::class);
}
/** @test */
public function findVisitsReturnsProperVisits(): void
/**
* @test
* @dataProvider provideBlockSize
*/
public function findVisitsReturnsProperVisits(int $blockSize): void
{
$shortUrl = new ShortUrl('');
$this->getEntityManager()->persist($shortUrl);
@ -63,8 +66,8 @@ class VisitRepositoryTest extends DatabaseTestCase
}
$this->getEntityManager()->flush();
$withEmptyLocation = $this->repo->findVisitsWithEmptyLocation();
$unlocated = $this->repo->findUnlocatedVisits();
$withEmptyLocation = $this->repo->findVisitsWithEmptyLocation($blockSize);
$unlocated = $this->repo->findUnlocatedVisits($blockSize);
// Important! assertCount will not work here, as this iterable object loads data dynamically and counts to
// 0 if not iterated
@ -72,6 +75,11 @@ class VisitRepositoryTest extends DatabaseTestCase
$this->assertEquals(4, $countIterable($withEmptyLocation));
}
public function provideBlockSize(): iterable
{
return map(range(1, 10), fn (int $value) => [$value]);
}
/** @test */
public function findVisitsByShortCodeReturnsProperData(): void
{

View file

@ -46,7 +46,7 @@ class VisitLocatorTest extends TestCase
);
$repo = $this->prophesize(VisitRepository::class);
$findUnlocatedVisits = $repo->findUnlocatedVisits(false)->willReturn($unlocatedVisits);
$findUnlocatedVisits = $repo->findUnlocatedVisits()->willReturn($unlocatedVisits);
$getRepo = $this->em->getRepository(Visit::class)->willReturn($repo->reveal());
$persist = $this->em->persist(Argument::type(Visit::class))->will(function (): void {
@ -81,7 +81,7 @@ class VisitLocatorTest extends TestCase
];
$repo = $this->prophesize(VisitRepository::class);
$findUnlocatedVisits = $repo->findUnlocatedVisits(false)->willReturn($unlocatedVisits);
$findUnlocatedVisits = $repo->findUnlocatedVisits()->willReturn($unlocatedVisits);
$getRepo = $this->em->getRepository(Visit::class)->willReturn($repo->reveal());
$persist = $this->em->persist(Argument::type(Visit::class))->will(function (): void {