From 26f4a969c905e4570b7ec9a6209cae128777bd57 Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Fri, 31 Mar 2023 09:46:05 +0200 Subject: [PATCH] Fix memory leak when importing big amounts of visits --- docker/config/php.ini | 2 +- .../src/Importer/ImportedLinksProcessor.php | 9 +++--- .../Core/src/Importer/ShortUrlImporting.php | 14 ++++++++-- .../Repository/CrawlableShortCodesQuery.php | 3 +- module/Core/src/Util/DoctrineBatchHelper.php | 5 ++-- module/Core/test-api/Action/RobotsTest.php | 2 +- .../Importer/ImportedLinksProcessorTest.php | 28 +++++++++++++++++++ 7 files changed, 52 insertions(+), 11 deletions(-) diff --git a/docker/config/php.ini b/docker/config/php.ini index fca44924..248e508d 100644 --- a/docker/config/php.ini +++ b/docker/config/php.ini @@ -1,4 +1,4 @@ log_errors_max_len=0 zend.assertions=1 assert.exception=1 -memory_limit=256M +memory_limit=512M diff --git a/module/Core/src/Importer/ImportedLinksProcessor.php b/module/Core/src/Importer/ImportedLinksProcessor.php index 7248e0d3..ef7633a7 100644 --- a/module/Core/src/Importer/ImportedLinksProcessor.php +++ b/module/Core/src/Importer/ImportedLinksProcessor.php @@ -18,7 +18,6 @@ use Shlinkio\Shlink\Importer\Model\ImportedShlinkOrphanVisit; use Shlinkio\Shlink\Importer\Model\ImportedShlinkUrl; use Shlinkio\Shlink\Importer\Model\ImportResult; use Shlinkio\Shlink\Importer\Params\ImportParams; -use Shlinkio\Shlink\Importer\Sources\ImportSource; use Symfony\Component\Console\Style\OutputStyle; use Symfony\Component\Console\Style\StyleInterface; use Throwable; @@ -55,8 +54,7 @@ class ImportedLinksProcessor implements ImportedLinksProcessorInterface private function importShortUrls(StyleInterface $io, iterable $shlinkUrls, ImportParams $params): void { $importShortCodes = $params->importShortCodes; - $source = $params->source; - $iterable = $this->batchHelper->wrapIterable($shlinkUrls, $source === ImportSource::SHLINK ? 10 : 100); + $iterable = $this->batchHelper->wrapIterable($shlinkUrls, $params->importVisits ? 10 : 100); foreach ($iterable as $importedUrl) { $skipOnShortCodeConflict = static fn (): bool => $io->choice(sprintf( @@ -82,7 +80,10 @@ class ImportedLinksProcessor implements ImportedLinksProcessorInterface continue; } - $resultMessage = $shortUrlImporting->importVisits($importedUrl->visits, $this->em); + $resultMessage = $shortUrlImporting->importVisits( + $this->batchHelper->wrapIterable($importedUrl->visits, 100), + $this->em, + ); $io->text(sprintf('%s: %s', $longUrl, $resultMessage)); } } diff --git a/module/Core/src/Importer/ShortUrlImporting.php b/module/Core/src/Importer/ShortUrlImporting.php index 0165c7c3..28c22a24 100644 --- a/module/Core/src/Importer/ShortUrlImporting.php +++ b/module/Core/src/Importer/ShortUrlImporting.php @@ -33,7 +33,7 @@ final class ShortUrlImporting */ public function importVisits(iterable $visits, EntityManagerInterface $em): string { - $mostRecentImportedDate = $this->shortUrl->mostRecentImportedVisitDate(); + $mostRecentImportedDate = $this->resolveShortUrl($em)->mostRecentImportedVisitDate(); $importedVisits = 0; foreach ($visits as $importedVisit) { @@ -42,7 +42,7 @@ final class ShortUrlImporting continue; } - $em->persist(Visit::fromImport($this->shortUrl, $importedVisit)); + $em->persist(Visit::fromImport($this->resolveShortUrl($em), $importedVisit)); $importedVisits++; } @@ -54,4 +54,14 @@ final class ShortUrlImporting ? sprintf('Imported with %s visits', $importedVisits) : sprintf('Skipped. Imported %s visits', $importedVisits); } + + private function resolveShortUrl(EntityManagerInterface $em): ShortUrl + { + // Instead of directly accessing wrapped ShortUrl entity, try to get it from the EM. + // With this, we will get the same entity from memory if it is known by the EM, but if it was cleared, the EM + // will fetch it again from the database, preventing errors at runtime. + // However, if the EM was not flushed yet, the entity will not be found by ID, but it is known by the EM. + // In that case, we fall back to wrapped ShortUrl entity directly. + return $em->find(ShortUrl::class, $this->shortUrl->getId()) ?? $this->shortUrl; + } } diff --git a/module/Core/src/ShortUrl/Repository/CrawlableShortCodesQuery.php b/module/Core/src/ShortUrl/Repository/CrawlableShortCodesQuery.php index 7b3821d8..89aef69e 100644 --- a/module/Core/src/ShortUrl/Repository/CrawlableShortCodesQuery.php +++ b/module/Core/src/ShortUrl/Repository/CrawlableShortCodesQuery.php @@ -20,7 +20,8 @@ class CrawlableShortCodesQuery extends EntitySpecificationRepository implements ->from(ShortUrl::class, 's') ->where($qb->expr()->eq('s.crawlable', ':crawlable')) ->setParameter('crawlable', true) - ->setMaxResults($blockSize); + ->setMaxResults($blockSize) + ->orderBy('s.shortCode'); $page = 0; do { diff --git a/module/Core/src/Util/DoctrineBatchHelper.php b/module/Core/src/Util/DoctrineBatchHelper.php index 5591ddb2..93d2d022 100644 --- a/module/Core/src/Util/DoctrineBatchHelper.php +++ b/module/Core/src/Util/DoctrineBatchHelper.php @@ -17,12 +17,14 @@ class DoctrineBatchHelper implements DoctrineBatchHelperInterface } /** + * @template T + * @param iterable $resultSet + * @return iterable * @throws Throwable */ public function wrapIterable(iterable $resultSet, int $batchSize): iterable { $iteration = 0; - $this->em->beginTransaction(); try { @@ -33,7 +35,6 @@ class DoctrineBatchHelper implements DoctrineBatchHelperInterface } } catch (Throwable $e) { $this->em->rollback(); - throw $e; } diff --git a/module/Core/test-api/Action/RobotsTest.php b/module/Core/test-api/Action/RobotsTest.php index 490c82fb..9b550fe3 100644 --- a/module/Core/test-api/Action/RobotsTest.php +++ b/module/Core/test-api/Action/RobotsTest.php @@ -22,8 +22,8 @@ class RobotsTest extends ApiTestCase # https://www.robotstxt.org/orig.html User-agent: * - Allow: /custom Allow: /abc123 + Allow: /custom Disallow: / ROBOTS, $body, diff --git a/module/Core/test/Importer/ImportedLinksProcessorTest.php b/module/Core/test/Importer/ImportedLinksProcessorTest.php index 95f7a7f8..ff8eebc6 100644 --- a/module/Core/test/Importer/ImportedLinksProcessorTest.php +++ b/module/Core/test/Importer/ImportedLinksProcessorTest.php @@ -7,6 +7,7 @@ namespace ShlinkioTest\Shlink\Core\Importer; use Cake\Chronos\Chronos; use Doctrine\Common\Collections\ArrayCollection; use Doctrine\ORM\EntityManagerInterface; +use PHPUnit\Framework\Assert; use PHPUnit\Framework\Attributes\DataProvider; use PHPUnit\Framework\Attributes\Test; use PHPUnit\Framework\MockObject\MockObject; @@ -184,6 +185,7 @@ class ImportedLinksProcessorTest extends TestCase $this->em->expects($this->exactly($amountOfPersistedVisits + ($foundShortUrl === null ? 1 : 0)))->method( 'persist', )->with($this->callback(fn (object $arg) => $arg instanceof ShortUrl || $arg instanceof Visit)); + $this->em->expects($this->any())->method('find')->willReturn(null); $this->io->expects($this->once())->method('text')->with($this->stringContains($expectedOutput)); $this->processor->process($this->io, ImportResult::withShortUrls([$importedUrl]), $this->buildParams()); @@ -229,6 +231,32 @@ class ImportedLinksProcessorTest extends TestCase ]; } + #[Test, DataProvider('provideFoundShortUrls')] + public function visitsArePersistedWithProperShortUrl(?ShortUrl $foundShortUrl): void + { + $originalShortUrl = ShortUrl::withLongUrl('https://foo'); + + $this->em->method('getRepository')->with(ShortUrl::class)->willReturn($this->repo); + $this->repo->expects($this->once())->method('findOneByImportedUrl')->willReturn($originalShortUrl); + $this->em->expects($this->exactly(2))->method('find')->willReturn($foundShortUrl); + $this->em->expects($this->once())->method('persist')->willReturnCallback( + static fn (Visit $visit) => Assert::assertSame($foundShortUrl ?? $originalShortUrl, $visit->getShortUrl()), + ); + + $now = Chronos::now(); + $this->processor->process($this->io, ImportResult::withShortUrls([ + new ImportedShlinkUrl(ImportSource::SHLINK, 'https://s', [], $now, null, 's', null, [ + new ImportedShlinkVisit('', '', $now, null), + ]), + ]), $this->buildParams()); + } + + public static function provideFoundShortUrls(): iterable + { + yield [null]; + yield [ShortUrl::withLongUrl('https://bar')]; + } + /** * @param iterable $visits */