Fixed bug missing unprocessed visits while iterating and updating, while drastically improving the performance

This commit is contained in:
Alejandro Celaya 2019-02-23 09:58:02 +01:00
parent 62133c994f
commit d2fad0128f
5 changed files with 45 additions and 24 deletions

View file

@ -11,22 +11,26 @@ use Shlinkio\Shlink\Core\Entity\Visit;
class VisitRepository extends EntityRepository implements VisitRepositoryInterface
{
/**
* This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in
* smaller blocks of a specific size.
* This will have side effects if you update those rows while you iterate them.
* If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the
* dataset
*
* @return iterable|Visit[]
*/
public function findUnlocatedVisits(int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable
public function findUnlocatedVisits(bool $applyOffset = true, int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable
{
$dql = <<<DQL
SELECT v FROM Shlinkio\Shlink\Core\Entity\Visit AS v WHERE v.visitLocation IS NULL
DQL;
$query = $this->getEntityManager()->createQuery($dql);
$query = $this->getEntityManager()->createQuery($dql)
->setMaxResults($blockSize);
$remainingVisitsToProcess = $this->count(['visitLocation' => null]);
$offset = 0;
while ($remainingVisitsToProcess > 0) {
$iterator = $query->setMaxResults($blockSize)
->setFirstResult($offset)
->iterate();
$iterator = $query->setFirstResult($applyOffset ? $offset : null)->iterate();
foreach ($iterator as $key => [$value]) {
yield $key => $value;
}

View file

@ -12,9 +12,15 @@ interface VisitRepositoryInterface extends ObjectRepository
public const DEFAULT_BLOCK_SIZE = 10000;
/**
* This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in
* smaller blocks of a specific size.
* This will have side effects if you update those rows while you iterate them.
* If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the
* dataset
*
* @return iterable|Visit[]
*/
public function findUnlocatedVisits(int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable;
public function findUnlocatedVisits(bool $applyOffset = true, int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable;
/**
* @return Visit[]

View file

@ -24,9 +24,12 @@ class VisitService implements VisitServiceInterface
{
/** @var VisitRepository $repo */
$repo = $this->em->getRepository(Visit::class);
$results = $repo->findUnlocatedVisits();
$results = $repo->findUnlocatedVisits(false);
$count = 0;
$persistBlock = 200;
foreach ($results as $visit) {
$count++;
try {
/** @var Location $location */
$location = $geolocateVisit($visit);
@ -37,20 +40,25 @@ class VisitService implements VisitServiceInterface
$location = new VisitLocation($location);
$this->locateVisit($visit, $location, $notifyVisitWithLocation);
// Flush and clear after X iterations
if ($count % $persistBlock === 0) {
$this->em->flush();
$this->em->clear();
}
}
$this->em->flush();
$this->em->clear();
}
private function locateVisit(Visit $visit, VisitLocation $location, ?callable $notifyVisitWithLocation): void
{
$visit->locate($location);
$this->em->persist($visit);
$this->em->flush();
if ($notifyVisitWithLocation !== null) {
$notifyVisitWithLocation($location, $visit);
}
$this->em->clear();
}
}

View file

@ -55,7 +55,7 @@ class VisitRepositoryTest extends DatabaseTestCase
$this->getEntityManager()->flush();
$resultsCount = 0;
$results = $this->repo->findUnlocatedVisits($blockSize);
$results = $this->repo->findUnlocatedVisits(true, $blockSize);
foreach ($results as $value) {
$resultsCount++;
}

View file

@ -17,7 +17,11 @@ use Shlinkio\Shlink\Core\Repository\VisitRepository;
use Shlinkio\Shlink\Core\Service\VisitService;
use function array_shift;
use function count;
use function floor;
use function func_get_args;
use function Functional\map;
use function range;
use function sprintf;
class VisitServiceTest extends TestCase
{
@ -35,13 +39,12 @@ class VisitServiceTest extends TestCase
/** @test */
public function locateVisitsIteratesAndLocatesUnlocatedVisits(): void
{
$unlocatedVisits = [
new Visit(new ShortUrl('foo'), Visitor::emptyInstance()),
new Visit(new ShortUrl('bar'), Visitor::emptyInstance()),
];
$unlocatedVisits = map(range(1, 200), function (int $i) {
return new Visit(new ShortUrl(sprintf('short_code_%s', $i)), Visitor::emptyInstance());
});
$repo = $this->prophesize(VisitRepository::class);
$findUnlocatedVisits = $repo->findUnlocatedVisits()->willReturn($unlocatedVisits);
$findUnlocatedVisits = $repo->findUnlocatedVisits(false)->willReturn($unlocatedVisits);
$getRepo = $this->em->getRepository(Visit::class)->willReturn($repo->reveal());
$persist = $this->em->persist(Argument::type(Visit::class))->will(function () {
@ -63,19 +66,19 @@ class VisitServiceTest extends TestCase
$findUnlocatedVisits->shouldHaveBeenCalledOnce();
$getRepo->shouldHaveBeenCalledOnce();
$persist->shouldHaveBeenCalledTimes(count($unlocatedVisits));
$flush->shouldHaveBeenCalledTimes(count($unlocatedVisits));
$clear->shouldHaveBeenCalledTimes(count($unlocatedVisits));
$flush->shouldHaveBeenCalledTimes(floor(count($unlocatedVisits) / 200) + 1);
$clear->shouldHaveBeenCalledTimes(floor(count($unlocatedVisits) / 200) + 1);
}
/** @test */
public function visitsWhichCannotBeLocatedAreIgnored()
public function visitsWhichCannotBeLocatedAreIgnored(): void
{
$unlocatedVisits = [
new Visit(new ShortUrl('foo'), Visitor::emptyInstance()),
];
$repo = $this->prophesize(VisitRepository::class);
$findUnlocatedVisits = $repo->findUnlocatedVisits()->willReturn($unlocatedVisits);
$findUnlocatedVisits = $repo->findUnlocatedVisits(false)->willReturn($unlocatedVisits);
$getRepo = $this->em->getRepository(Visit::class)->willReturn($repo->reveal());
$persist = $this->em->persist(Argument::type(Visit::class))->will(function () {
@ -92,7 +95,7 @@ class VisitServiceTest extends TestCase
$findUnlocatedVisits->shouldHaveBeenCalledOnce();
$getRepo->shouldHaveBeenCalledOnce();
$persist->shouldNotHaveBeenCalled();
$flush->shouldNotHaveBeenCalled();
$clear->shouldNotHaveBeenCalled();
$flush->shouldHaveBeenCalledOnce();
$clear->shouldHaveBeenCalledOnce();
}
}