mirror of
https://github.com/shlinkio/shlink.git
synced 2024-11-24 05:38:06 +03:00
Fixed bug missing unprocessed visits while iterating and updating, while drastically improving the performance
This commit is contained in:
parent
62133c994f
commit
d2fad0128f
5 changed files with 45 additions and 24 deletions
|
@ -11,22 +11,26 @@ use Shlinkio\Shlink\Core\Entity\Visit;
|
|||
class VisitRepository extends EntityRepository implements VisitRepositoryInterface
|
||||
{
|
||||
/**
|
||||
* This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in
|
||||
* smaller blocks of a specific size.
|
||||
* This will have side effects if you update those rows while you iterate them.
|
||||
* If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the
|
||||
* dataset
|
||||
*
|
||||
* @return iterable|Visit[]
|
||||
*/
|
||||
public function findUnlocatedVisits(int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable
|
||||
public function findUnlocatedVisits(bool $applyOffset = true, int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable
|
||||
{
|
||||
$dql = <<<DQL
|
||||
SELECT v FROM Shlinkio\Shlink\Core\Entity\Visit AS v WHERE v.visitLocation IS NULL
|
||||
DQL;
|
||||
$query = $this->getEntityManager()->createQuery($dql);
|
||||
$query = $this->getEntityManager()->createQuery($dql)
|
||||
->setMaxResults($blockSize);
|
||||
$remainingVisitsToProcess = $this->count(['visitLocation' => null]);
|
||||
$offset = 0;
|
||||
|
||||
while ($remainingVisitsToProcess > 0) {
|
||||
$iterator = $query->setMaxResults($blockSize)
|
||||
->setFirstResult($offset)
|
||||
->iterate();
|
||||
|
||||
$iterator = $query->setFirstResult($applyOffset ? $offset : null)->iterate();
|
||||
foreach ($iterator as $key => [$value]) {
|
||||
yield $key => $value;
|
||||
}
|
||||
|
|
|
@ -12,9 +12,15 @@ interface VisitRepositoryInterface extends ObjectRepository
|
|||
public const DEFAULT_BLOCK_SIZE = 10000;
|
||||
|
||||
/**
|
||||
* This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in
|
||||
* smaller blocks of a specific size.
|
||||
* This will have side effects if you update those rows while you iterate them.
|
||||
* If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the
|
||||
* dataset
|
||||
*
|
||||
* @return iterable|Visit[]
|
||||
*/
|
||||
public function findUnlocatedVisits(int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable;
|
||||
public function findUnlocatedVisits(bool $applyOffset = true, int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable;
|
||||
|
||||
/**
|
||||
* @return Visit[]
|
||||
|
|
|
@ -24,9 +24,12 @@ class VisitService implements VisitServiceInterface
|
|||
{
|
||||
/** @var VisitRepository $repo */
|
||||
$repo = $this->em->getRepository(Visit::class);
|
||||
$results = $repo->findUnlocatedVisits();
|
||||
$results = $repo->findUnlocatedVisits(false);
|
||||
$count = 0;
|
||||
$persistBlock = 200;
|
||||
|
||||
foreach ($results as $visit) {
|
||||
$count++;
|
||||
try {
|
||||
/** @var Location $location */
|
||||
$location = $geolocateVisit($visit);
|
||||
|
@ -37,20 +40,25 @@ class VisitService implements VisitServiceInterface
|
|||
|
||||
$location = new VisitLocation($location);
|
||||
$this->locateVisit($visit, $location, $notifyVisitWithLocation);
|
||||
|
||||
// Flush and clear after X iterations
|
||||
if ($count % $persistBlock === 0) {
|
||||
$this->em->flush();
|
||||
$this->em->clear();
|
||||
}
|
||||
}
|
||||
|
||||
$this->em->flush();
|
||||
$this->em->clear();
|
||||
}
|
||||
|
||||
private function locateVisit(Visit $visit, VisitLocation $location, ?callable $notifyVisitWithLocation): void
|
||||
{
|
||||
$visit->locate($location);
|
||||
|
||||
$this->em->persist($visit);
|
||||
$this->em->flush();
|
||||
|
||||
if ($notifyVisitWithLocation !== null) {
|
||||
$notifyVisitWithLocation($location, $visit);
|
||||
}
|
||||
|
||||
$this->em->clear();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ class VisitRepositoryTest extends DatabaseTestCase
|
|||
$this->getEntityManager()->flush();
|
||||
|
||||
$resultsCount = 0;
|
||||
$results = $this->repo->findUnlocatedVisits($blockSize);
|
||||
$results = $this->repo->findUnlocatedVisits(true, $blockSize);
|
||||
foreach ($results as $value) {
|
||||
$resultsCount++;
|
||||
}
|
||||
|
|
|
@ -17,7 +17,11 @@ use Shlinkio\Shlink\Core\Repository\VisitRepository;
|
|||
use Shlinkio\Shlink\Core\Service\VisitService;
|
||||
use function array_shift;
|
||||
use function count;
|
||||
use function floor;
|
||||
use function func_get_args;
|
||||
use function Functional\map;
|
||||
use function range;
|
||||
use function sprintf;
|
||||
|
||||
class VisitServiceTest extends TestCase
|
||||
{
|
||||
|
@ -35,13 +39,12 @@ class VisitServiceTest extends TestCase
|
|||
/** @test */
|
||||
public function locateVisitsIteratesAndLocatesUnlocatedVisits(): void
|
||||
{
|
||||
$unlocatedVisits = [
|
||||
new Visit(new ShortUrl('foo'), Visitor::emptyInstance()),
|
||||
new Visit(new ShortUrl('bar'), Visitor::emptyInstance()),
|
||||
];
|
||||
$unlocatedVisits = map(range(1, 200), function (int $i) {
|
||||
return new Visit(new ShortUrl(sprintf('short_code_%s', $i)), Visitor::emptyInstance());
|
||||
});
|
||||
|
||||
$repo = $this->prophesize(VisitRepository::class);
|
||||
$findUnlocatedVisits = $repo->findUnlocatedVisits()->willReturn($unlocatedVisits);
|
||||
$findUnlocatedVisits = $repo->findUnlocatedVisits(false)->willReturn($unlocatedVisits);
|
||||
$getRepo = $this->em->getRepository(Visit::class)->willReturn($repo->reveal());
|
||||
|
||||
$persist = $this->em->persist(Argument::type(Visit::class))->will(function () {
|
||||
|
@ -63,19 +66,19 @@ class VisitServiceTest extends TestCase
|
|||
$findUnlocatedVisits->shouldHaveBeenCalledOnce();
|
||||
$getRepo->shouldHaveBeenCalledOnce();
|
||||
$persist->shouldHaveBeenCalledTimes(count($unlocatedVisits));
|
||||
$flush->shouldHaveBeenCalledTimes(count($unlocatedVisits));
|
||||
$clear->shouldHaveBeenCalledTimes(count($unlocatedVisits));
|
||||
$flush->shouldHaveBeenCalledTimes(floor(count($unlocatedVisits) / 200) + 1);
|
||||
$clear->shouldHaveBeenCalledTimes(floor(count($unlocatedVisits) / 200) + 1);
|
||||
}
|
||||
|
||||
/** @test */
|
||||
public function visitsWhichCannotBeLocatedAreIgnored()
|
||||
public function visitsWhichCannotBeLocatedAreIgnored(): void
|
||||
{
|
||||
$unlocatedVisits = [
|
||||
new Visit(new ShortUrl('foo'), Visitor::emptyInstance()),
|
||||
];
|
||||
|
||||
$repo = $this->prophesize(VisitRepository::class);
|
||||
$findUnlocatedVisits = $repo->findUnlocatedVisits()->willReturn($unlocatedVisits);
|
||||
$findUnlocatedVisits = $repo->findUnlocatedVisits(false)->willReturn($unlocatedVisits);
|
||||
$getRepo = $this->em->getRepository(Visit::class)->willReturn($repo->reveal());
|
||||
|
||||
$persist = $this->em->persist(Argument::type(Visit::class))->will(function () {
|
||||
|
@ -92,7 +95,7 @@ class VisitServiceTest extends TestCase
|
|||
$findUnlocatedVisits->shouldHaveBeenCalledOnce();
|
||||
$getRepo->shouldHaveBeenCalledOnce();
|
||||
$persist->shouldNotHaveBeenCalled();
|
||||
$flush->shouldNotHaveBeenCalled();
|
||||
$clear->shouldNotHaveBeenCalled();
|
||||
$flush->shouldHaveBeenCalledOnce();
|
||||
$clear->shouldHaveBeenCalledOnce();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue