From d2fad0128fb5ea338e078c6b3878189c1edde654 Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Sat, 23 Feb 2019 09:58:02 +0100 Subject: [PATCH] Fixed bug missing unprocessed visits while iterating and updating, while drastically improving the performance --- .../Core/src/Repository/VisitRepository.php | 16 +++++++----- .../Repository/VisitRepositoryInterface.php | 8 +++++- module/Core/src/Service/VisitService.php | 18 +++++++++---- .../Repository/VisitRepositoryTest.php | 2 +- module/Core/test/Service/VisitServiceTest.php | 25 +++++++++++-------- 5 files changed, 45 insertions(+), 24 deletions(-) diff --git a/module/Core/src/Repository/VisitRepository.php b/module/Core/src/Repository/VisitRepository.php index 9c65877c..06c7f252 100644 --- a/module/Core/src/Repository/VisitRepository.php +++ b/module/Core/src/Repository/VisitRepository.php @@ -11,22 +11,26 @@ use Shlinkio\Shlink\Core\Entity\Visit; class VisitRepository extends EntityRepository implements VisitRepositoryInterface { /** + * This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in + * smaller blocks of a specific size. + * This will have side effects if you update those rows while you iterate them. + * If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the + * dataset + * * @return iterable|Visit[] */ - public function findUnlocatedVisits(int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable + public function findUnlocatedVisits(bool $applyOffset = true, int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable { $dql = <<getEntityManager()->createQuery($dql); + $query = $this->getEntityManager()->createQuery($dql) + ->setMaxResults($blockSize); $remainingVisitsToProcess = $this->count(['visitLocation' => null]); $offset = 0; while ($remainingVisitsToProcess > 0) { - $iterator = $query->setMaxResults($blockSize) - ->setFirstResult($offset) - ->iterate(); - + $iterator = $query->setFirstResult($applyOffset ? $offset : null)->iterate(); foreach ($iterator as $key => [$value]) { yield $key => $value; } diff --git a/module/Core/src/Repository/VisitRepositoryInterface.php b/module/Core/src/Repository/VisitRepositoryInterface.php index 2307946a..585bb3ef 100644 --- a/module/Core/src/Repository/VisitRepositoryInterface.php +++ b/module/Core/src/Repository/VisitRepositoryInterface.php @@ -12,9 +12,15 @@ interface VisitRepositoryInterface extends ObjectRepository public const DEFAULT_BLOCK_SIZE = 10000; /** + * This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in + * smaller blocks of a specific size. + * This will have side effects if you update those rows while you iterate them. + * If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the + * dataset + * * @return iterable|Visit[] */ - public function findUnlocatedVisits(int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable; + public function findUnlocatedVisits(bool $applyOffset = true, int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable; /** * @return Visit[] diff --git a/module/Core/src/Service/VisitService.php b/module/Core/src/Service/VisitService.php index 78c0eddd..4abbf4a2 100644 --- a/module/Core/src/Service/VisitService.php +++ b/module/Core/src/Service/VisitService.php @@ -24,9 +24,12 @@ class VisitService implements VisitServiceInterface { /** @var VisitRepository $repo */ $repo = $this->em->getRepository(Visit::class); - $results = $repo->findUnlocatedVisits(); + $results = $repo->findUnlocatedVisits(false); + $count = 0; + $persistBlock = 200; foreach ($results as $visit) { + $count++; try { /** @var Location $location */ $location = $geolocateVisit($visit); @@ -37,20 +40,25 @@ class VisitService implements VisitServiceInterface $location = new VisitLocation($location); $this->locateVisit($visit, $location, $notifyVisitWithLocation); + + // Flush and clear after X iterations + if ($count % $persistBlock === 0) { + $this->em->flush(); + $this->em->clear(); + } } + + $this->em->flush(); + $this->em->clear(); } private function locateVisit(Visit $visit, VisitLocation $location, ?callable $notifyVisitWithLocation): void { $visit->locate($location); - $this->em->persist($visit); - $this->em->flush(); if ($notifyVisitWithLocation !== null) { $notifyVisitWithLocation($location, $visit); } - - $this->em->clear(); } } diff --git a/module/Core/test-db/Repository/VisitRepositoryTest.php b/module/Core/test-db/Repository/VisitRepositoryTest.php index 64702fcd..46bda817 100644 --- a/module/Core/test-db/Repository/VisitRepositoryTest.php +++ b/module/Core/test-db/Repository/VisitRepositoryTest.php @@ -55,7 +55,7 @@ class VisitRepositoryTest extends DatabaseTestCase $this->getEntityManager()->flush(); $resultsCount = 0; - $results = $this->repo->findUnlocatedVisits($blockSize); + $results = $this->repo->findUnlocatedVisits(true, $blockSize); foreach ($results as $value) { $resultsCount++; } diff --git a/module/Core/test/Service/VisitServiceTest.php b/module/Core/test/Service/VisitServiceTest.php index 4f249b23..be2bf8ba 100644 --- a/module/Core/test/Service/VisitServiceTest.php +++ b/module/Core/test/Service/VisitServiceTest.php @@ -17,7 +17,11 @@ use Shlinkio\Shlink\Core\Repository\VisitRepository; use Shlinkio\Shlink\Core\Service\VisitService; use function array_shift; use function count; +use function floor; use function func_get_args; +use function Functional\map; +use function range; +use function sprintf; class VisitServiceTest extends TestCase { @@ -35,13 +39,12 @@ class VisitServiceTest extends TestCase /** @test */ public function locateVisitsIteratesAndLocatesUnlocatedVisits(): void { - $unlocatedVisits = [ - new Visit(new ShortUrl('foo'), Visitor::emptyInstance()), - new Visit(new ShortUrl('bar'), Visitor::emptyInstance()), - ]; + $unlocatedVisits = map(range(1, 200), function (int $i) { + return new Visit(new ShortUrl(sprintf('short_code_%s', $i)), Visitor::emptyInstance()); + }); $repo = $this->prophesize(VisitRepository::class); - $findUnlocatedVisits = $repo->findUnlocatedVisits()->willReturn($unlocatedVisits); + $findUnlocatedVisits = $repo->findUnlocatedVisits(false)->willReturn($unlocatedVisits); $getRepo = $this->em->getRepository(Visit::class)->willReturn($repo->reveal()); $persist = $this->em->persist(Argument::type(Visit::class))->will(function () { @@ -63,19 +66,19 @@ class VisitServiceTest extends TestCase $findUnlocatedVisits->shouldHaveBeenCalledOnce(); $getRepo->shouldHaveBeenCalledOnce(); $persist->shouldHaveBeenCalledTimes(count($unlocatedVisits)); - $flush->shouldHaveBeenCalledTimes(count($unlocatedVisits)); - $clear->shouldHaveBeenCalledTimes(count($unlocatedVisits)); + $flush->shouldHaveBeenCalledTimes(floor(count($unlocatedVisits) / 200) + 1); + $clear->shouldHaveBeenCalledTimes(floor(count($unlocatedVisits) / 200) + 1); } /** @test */ - public function visitsWhichCannotBeLocatedAreIgnored() + public function visitsWhichCannotBeLocatedAreIgnored(): void { $unlocatedVisits = [ new Visit(new ShortUrl('foo'), Visitor::emptyInstance()), ]; $repo = $this->prophesize(VisitRepository::class); - $findUnlocatedVisits = $repo->findUnlocatedVisits()->willReturn($unlocatedVisits); + $findUnlocatedVisits = $repo->findUnlocatedVisits(false)->willReturn($unlocatedVisits); $getRepo = $this->em->getRepository(Visit::class)->willReturn($repo->reveal()); $persist = $this->em->persist(Argument::type(Visit::class))->will(function () { @@ -92,7 +95,7 @@ class VisitServiceTest extends TestCase $findUnlocatedVisits->shouldHaveBeenCalledOnce(); $getRepo->shouldHaveBeenCalledOnce(); $persist->shouldNotHaveBeenCalled(); - $flush->shouldNotHaveBeenCalled(); - $clear->shouldNotHaveBeenCalled(); + $flush->shouldHaveBeenCalledOnce(); + $clear->shouldHaveBeenCalledOnce(); } }